#Code used to analyse and plot data for Allen et al. (2020) # doi: 10.1098/rspb.2020.1125 ########## Palaeorotate points ########## #Rotated using PALEOMAP global plate models as follows: #Wuchiapingian & Changhsingian -> Map #50 "Late Permian(Lopingian, 255.7Ma)" #Induan -> Map #49 "Permo-Triassic boundary (251Ma)" #Olenekian -> Map #48 "Early Triassic (Induan & Olenekian, 248.5Ma)" #Anisian -> Map #47 "Middle Triassic (Anisian, 241.5Ma)" #Ladinian -> Map #46 "Middle Triassic (Ladinian, 232.9Ma)" #setwd("#####") #Load packages library(tidyverse) library(sp) library(rgdal) library(rgeos) library(maptools) ###Part 1. Convert modern lat-longs to a shape file #Read in csv file with the present-day lat/long coordinates dat <- read.csv("data/tetrapod_database_final.csv", stringsAsFactors = FALSE) dat <- data.frame(dat) #Lopingian #Create data frame with collection numbers, longs, lats LopingianFilter <- filter(dat, stage_assignment == "Wuchiapingian" | stage_assignment == "Changhsingian") LopingianPoints <- LopingianFilter[c("collection_no", "lng", "lat")] #Filter to unique points (one per collection) LopingianPoints <- distinct(LopingianPoints, collection_no, .keep_all = T) Lopingian_s2xy <- LopingianPoints[c("lng", "lat")] #Label as coordinates coordinates(Lopingian_s2xy) <- ~ lng + lat #Convert to 'Spatial Points Data Frame' Lopingian_shp <- SpatialPointsDataFrame(Lopingian_s2xy, LopingianPoints, proj4string = CRS("+proj=longlat +datum=WGS84")) #Write shape file writeOGR(Lopingian_shp, "data/Modern shapefiles", "Lopingian", driver="ESRI Shapefile") #Induan #Create data frame with collection numbers, longs, lats InduanFilter <- filter(dat, stage_assignment == "Induan") InduanPoints <- InduanFilter[c("collection_no", "lng", "lat")] #Filter to unique points (one per collection) InduanPoints <- distinct(InduanPoints, collection_no, .keep_all = T) Induan_s2xy <- InduanPoints[c("lng", "lat")] #Label as coordinates coordinates(Induan_s2xy) <- ~ lng + lat #Convert to 'Spatial Points Data Frame' Induan_shp <- SpatialPointsDataFrame(Induan_s2xy, InduanPoints, proj4string = CRS("+proj=longlat +datum=WGS84")) #Write shape file writeOGR(Induan_shp, "data/Modern shapefiles", "Induan", driver="ESRI Shapefile") #Olenekian #Create data frame with collection numbers, longs, lats OlenekianFilter <- filter(dat, stage_assignment == "Olenekian") OlenekianPoints <- OlenekianFilter[c("collection_no", "lng", "lat")] #Filter to unique points (one per collection) OlenekianPoints <- distinct(OlenekianPoints, collection_no, .keep_all = T) Olenekian_s2xy <- OlenekianPoints[c("lng", "lat")] #Label as coordinates coordinates(Olenekian_s2xy) <- ~ lng + lat #Convert to 'Spatial Points Data Frame' Olenekian_shp <- SpatialPointsDataFrame(Olenekian_s2xy, OlenekianPoints, proj4string = CRS("+proj=longlat +datum=WGS84")) #Write shape file writeOGR(Olenekian_shp, "data/Modern shapefiles", "Olenekian", driver="ESRI Shapefile") #Anisian #Create data frame with collection numbers, longs, lats AnisianFilter <- filter(dat, stage_assignment == "Anisian") AnisianPoints <- AnisianFilter[c("collection_no", "lng", "lat")] #Filter to unique points (one per collection) AnisianPoints <- distinct(AnisianPoints, collection_no, .keep_all = T) Anisian_s2xy <- AnisianPoints[c("lng", "lat")] #Label as coordinates coordinates(Anisian_s2xy) <- ~ lng + lat #Convert to 'Spatial Points Data Frame' Anisian_shp <- SpatialPointsDataFrame(Anisian_s2xy, AnisianPoints, proj4string = CRS("+proj=longlat +datum=WGS84")) #Write shape file writeOGR(Anisian_shp, "data/Modern shapefiles", "Anisian", driver="ESRI Shapefile") #Ladinian #Create data frame with collection numbers, longs, lats LadinianFilter <- filter(dat, stage_assignment == "Ladinian") LadinianPoints <- LadinianFilter[c("collection_no", "lng", "lat")] #Filter to unique points (one per collection) LadinianPoints <- distinct(LadinianPoints, collection_no, .keep_all = T) Ladinian_s2xy <- LadinianPoints[c("lng", "lat")] #Label as coordinates coordinates(Ladinian_s2xy) <- ~ lng + lat #Convert to 'Spatial Points Data Frame' Ladinian_shp <- SpatialPointsDataFrame(Ladinian_s2xy, LadinianPoints, proj4string = CRS("+proj=longlat +datum=WGS84")) #Write shape file writeOGR(Ladinian_shp, "data/Modern shapefiles", "Ladinian", driver="ESRI Shapefile") ### Rotate the points in GPlates ### ###Part 2. Convert shape file back to palaeo lat-longs #Read shape file, convert to data frame, write as csv dat <- read.csv("data/tetrapod_database_final.csv", stringsAsFactors = FALSE) dat <- data.frame(dat) #Lopingian LopingianFilter <- filter(dat, stage_assignment == "Wuchiapingian" | stage_assignment == "Changhsingian") LopingianPoints <- LopingianFilter[c("collection_no", "lng", "lat")] #Filter to unique points (one per collection) LopingianPoints <- distinct(LopingianPoints, collection_no, .keep_all = T) Lopingian_shp <- readOGR("data/Lopingian/reconstructed_255.70Ma.shp") Lopingian_dat <- data.frame(Lopingian_shp) Lopingian_s2xy <- Lopingian_dat[c("coords.x1", "coords.x2")] Lopingian_s2xy$collection_no <- LopingianPoints$collection_no #Induan InduanFilter <- filter(dat, stage_assignment == "Induan") InduanPoints <- InduanFilter[c("collection_no", "lng", "lat")] #Filter to unique points (one per collection) InduanPoints <- distinct(InduanPoints, collection_no, .keep_all = T) Induan_shp <- readOGR("data/Induan/reconstructed_251.00Ma.shp") Induan_dat <- data.frame(Induan_shp) Induan_s2xy <- Induan_dat[c("coords.x1", "coords.x2")] Induan_s2xy$collection_no <- InduanPoints$collection_no #Olenekian OlenekianFilter <- filter(dat, stage_assignment == "Olenekian") OlenekianPoints <- OlenekianFilter[c("collection_no", "lng", "lat")] #Filter to unique points (one per collection) OlenekianPoints <- distinct(OlenekianPoints, collection_no, .keep_all = T) Olenekian_shp <- readOGR("data/Olenekian/reconstructed_248.50Ma.shp") Olenekian_dat <- data.frame(Olenekian_shp) Olenekian_s2xy <- Olenekian_dat[c("coords.x1", "coords.x2")] Olenekian_s2xy$collection_no <- OlenekianPoints$collection_no #Anisian AnisianFilter <- filter(dat, stage_assignment == "Anisian") AnisianPoints <- AnisianFilter[c("collection_no", "lng", "lat")] #Filter to unique points (one per collection) AnisianPoints <- distinct(AnisianPoints, collection_no, .keep_all = T) Anisian_shp <- readOGR("data/Anisian/reconstructed_241.50Ma.shp") Anisian_dat <- data.frame(Anisian_shp) Anisian_s2xy <- Anisian_dat[c("coords.x1", "coords.x2")] Anisian_s2xy$collection_no <- AnisianPoints$collection_no #Ladinian LadinianFilter <- filter(dat, stage_assignment == "Ladinian") LadinianPoints <- LadinianFilter[c("collection_no", "lng", "lat")] #Filter to unique points (one per collection) LadinianPoints <- distinct(LadinianPoints, collection_no, .keep_all = T) Ladinian_shp <- readOGR("data/Ladinian/reconstructed_232.90Ma.shp") Ladinian_dat <- data.frame(Ladinian_shp) Ladinian_s2xy <- Ladinian_dat[c("coords.x1", "coords.x2")] Ladinian_s2xy$collection_no <- LadinianPoints$collection_no #Compile AllPoints <- rbind(Lopingian_s2xy, Induan_s2xy, Olenekian_s2xy, Anisian_s2xy, Ladinian_s2xy) #Write a csv of points #write.csv(AllPoints, "data/rotated_points.csv") #Match points to collection numbers new_db <- left_join(dat, AllPoints, by = "collection_no") #Write a new csv with the paleolat & long added write_csv(new_db, path = "data/tetrapod_database_final.csv") ########## Plot occurrences onto maps (Figure 1) ########## #setwd("#####") #Load packages library(tidyverse) library(jpeg) #Create a vector giving the chronological order of stages stages <- c("Wuchiapingian", "Changhsingian", "Induan", "Olenekian", "Anisian", "Ladinian", "Carnian") #Create a vector of stage midpoints midpoints <- c(256.6, 253, 251.6, 249.2, 244.6, 239.5, 232) #Create a vector giving the chronological order of substages substages <- c("Wuchiapingian", "Changhsingian", "Griesbachian", "Dienerian", "Smithian", "Spathian", "Aegean-Bithynian", "Pelsonian-Illyrian", "Fassanian", "Longobardian", "Julian", "Tuvalian") #Create a vector of substage midpoints submidpoints <- c(256.6, 253, 251.7, 251.4, 250.2, 248.2, 245.9, 243.3, 240.8, 238.3, 234.5, 229.5) #Read in dataset tetrapods <- read_csv("data/tetrapod_database_final.csv") glimpse(tetrapods) #Wrangling #Check no incorrect spelling #unique(tetrapods$stage_assignment) #unique(tetrapods$substage_assignment) #Remove synonymy repeats (combinations of the same collection no. AND accepted name) tetrapods <- distinct(tetrapods, accepted_name, collection_no, .keep_all = T) ###Plot using Scotese palaeo-rasters, LP/ET/MT### LopingianMap <- readJPEG("data/LP trace_shade.jpg") LopingianPoints <- filter(tetrapods, stage_assignment == "Wuchiapingian"| stage_assignment == "Changhsingian") ggplot(LopingianPoints, aes(x = Longitude, y = Latitude, group = pres_mode, colour = pres_mode)) + xlim(c(-180, 180)) + ylim(c(-90, 90)) + annotation_custom(grid::rasterGrob(LopingianMap, width=unit(0.9, "npc"), height=unit(0.9, "npc"), just = "centre")) + geom_segment(aes(x = -180, xend = 180, y = 0, yend = 0), linetype = "solid", colour = "black") + geom_point(size = 3) + scale_colour_manual(values = c("limegreen", "orange")) + guides(fill = F, colour = F) + theme_classic() ETMap <- readJPEG("data/ET trace_shade.jpg") ETPoints <- filter(tetrapods, stage_assignment == "Induan" | stage_assignment == "Olenekian") ggplot(ETPoints, aes(x = Longitude, y = Latitude, group = interaction(pres_mode, taxon_environment), colour = interaction(pres_mode, taxon_environment))) + xlim(c(-180, 180)) + ylim(c(-90, 90)) + annotation_custom(grid::rasterGrob(ETMap, width=unit(0.9, "npc"), height=unit(0.9, "npc"), just = "centre")) + geom_segment(aes(x = -180, xend = 180, y = 0, yend = 0), linetype = "solid", colour = "black") + geom_point(size = 3) + scale_colour_manual(values = c("blue", "limegreen", "orange")) + guides(fill = F, colour = F) + theme_classic() MTMap <- readJPEG("data/MT trace_shade.jpg") MTPoints <- filter(tetrapods, stage_assignment == "Anisian" | stage_assignment == "Ladinian") ggplot(MTPoints, aes(x = Longitude, y = Latitude, group = interaction(pres_mode, taxon_environment), colour = interaction(pres_mode, taxon_environment))) + xlim(c(-180, 180)) + ylim(c(-90, 90)) + annotation_custom(grid::rasterGrob(MTMap, width=unit(0.9, "npc"), height=unit(0.9, "npc"), just = "centre")) + geom_segment(aes(x = -180, xend = 180, y = 0, yend = 0), linetype = "solid", colour = "black") + geom_point(size = 3) + scale_colour_manual(values = c("blue", "limegreen", "orange")) + guides(fill = F, colour = F) + theme_classic() ########## Raw diversity by latitude bin (Figure 1) ########## #setwd("#####") #Load packages library(tidyverse) #Create a vector giving the chronological order of stages stages <- c("Wuchiapingian", "Changhsingian", "Induan", "Olenekian", "Anisian", "Ladinian", "Carnian") #Create a vector of stage midpoints midpoints <- c(256.6, 253, 251.6, 249.2, 244.6, 239.5, 232) #Read in dataset tetrapods <- read_csv("data/tetrapod_database_final.csv") glimpse(tetrapods) #Remember that there are only marine occurrences from the Olenekian onwards #Wrangling #Check no incorrect spelling #unique(tetrapods$stage_assignment) #unique(tetrapods$substage_assignment) #Remove synonymy repeats (combinations of the same collection no. AND accepted name) tetrapods <- distinct(tetrapods, accepted_name, collection_no, .keep_all = T) #Filter to occurrences with stage assignment tetrapods <- tetrapods %>% filter(!is.na(stage_assignment)) ###Latitude line graphs of richness### #Add column which indicates latitude bin bins <- seq(from = -90, to = 90, by = 20) labels <- seq(from = -80, to = 80, by = 20) tetrapods_lat <- mutate(tetrapods, paleolat_code = cut(Latitude, breaks = bins, labels = labels)) #Choose stage(s) for filter tetrapods_lat_stage <- tetrapods_lat %>% filter(stage_assignment == "Wuchiapingian") #tetrapods_lat_stage <- tetrapods_lat %>% filter(stage_assignment == "Wuchiapingian" | stage_assignment == "Changhsingian") #Filter unique taxa #Create an empty dataset with PBDB column names to collect unique occurrences in unique_by_bin <- tetrapods_lat_stage[FALSE,] #Loop through each palaeolatitude bin #For that bin, retain species occurrences, then retain unique taxa at gradually # higher taxonomic levels which are not already represented in that bin for (i in 1:(length(labels))) { print(i) one_bin <- tetrapods_lat_stage %>% filter(paleolat_code == labels[i]) for (j in 1:(nrow(one_bin))) { if (nrow(one_bin) != 0) if (one_bin$accepted_rank[j] == "species") unique_by_bin <- rbind(unique_by_bin, one_bin[j,]) else if (!is.na(one_bin$genus[j])) (if (one_bin$genus[j] %in% one_bin$genus[-j] == F) unique_by_bin <- rbind(unique_by_bin, one_bin[j,])) else if (!is.na(one_bin$family[j])) (if (one_bin$family[j] %in% one_bin$family[-j] == F) unique_by_bin <- rbind(unique_by_bin, one_bin[j,])) else if (!is.na(one_bin$order[j])) (if (one_bin$order[j] %in% one_bin$order[-j] == F) unique_by_bin <- rbind(unique_by_bin, one_bin[j,])) } } #Remove repeats of species names, to get one occurrence per unique species per bin unique_by_bin <- distinct(unique_by_bin, accepted_name, paleolat_code, .keep_all = T) #Subset data as trace vs. marine vs. terrestrial traces <- unique_by_bin %>% filter(pres_mode == "trace") %>% count(paleolat_code) marine <- unique_by_bin %>% filter(pres_mode == "body") %>% filter(taxon_environment == "marine") %>% count(paleolat_code) terrestrial <- unique_by_bin %>% filter(pres_mode == "body") %>% filter(taxon_environment == "terrestrial") %>% count(paleolat_code) #Fill in gaps for(i in 1:length(labels)) { if((labels[i] %in% traces$paleolat_code) == FALSE) traces <- rbind(traces, c(labels[i], 0)) } for(i in 1:length(labels)) { if((labels[i] %in% marine$paleolat_code) == FALSE) marine <- rbind(marine, c(labels[i], 0)) } for(i in 1:length(labels)) { if((labels[i] %in% terrestrial$paleolat_code) == FALSE) terrestrial <- rbind(terrestrial, c(labels[i], 0)) } #Reorder for ease of checking traces <- arrange(traces, desc(paleolat_code)) marine <- arrange(marine, desc(paleolat_code)) terrestrial <- arrange(terrestrial, desc(paleolat_code)) #May need to rename columns to enable merge #colnames(traces) <- c("paleolat_code","n") #colnames(marine) <- c("paleolat_code","n") #colnames(terrestrial) <- c("paleolat_code","n") #Combine into single data frame for plotting traces$type <- "Number of footprint occurrences" marine$type <- "Number of marine occurrences" terrestrial$type <- "Number of terrestrial occurrences" lat_counts <- rbind(traces, marine, terrestrial) #Convert to numbers for plotting #terrestrial$paleolat_code <- as.numeric(as.character(terrestrial$paleolat_code)) lat_counts$paleolat_code <- as.numeric(as.character(lat_counts$paleolat_code)) #Plot multiple fossil types ggplot(lat_counts, aes(x = paleolat_code, y = n, group = type, colour = type)) + geom_line(size = 1) + geom_point(size = 2) + labs(x = "Palaeolatitude", y = "Raw species richness count") + coord_flip() + scale_x_continuous(limits = c(-90, 90)) + geom_vline(aes(xintercept = 0), colour = "black") + scale_colour_manual(values = c("orange", "blue", "limegreen")) + theme_classic() #Plot one fossil type ggplot(terrestrial, aes(x = paleolat_code, y = n, group = 1)) + geom_line(size = 1, colour = "limegreen") + geom_point(size = 2, colour = "limegreen") + labs(x = "Palaeolatitude", y = "Raw species richness count") + coord_flip() + scale_x_continuous(limits = c(-90, 90)) + geom_vline(aes(xintercept = 0), colour = "black") + theme_classic() ########## Squares diversity by latitude graphs (Figure 1) ########## #setwd("#####") #Load packages library(tidyverse) #Read in dataset tetrapods <- read_csv("data/tetrapod_database_final.csv") glimpse(tetrapods) #Remove synonymy repeats (combinations of the same collection no. AND accepted name) tetrapods <- distinct(tetrapods, accepted_name, collection_no, .keep_all = T) #Trim data without substage assignment or species-level identification tetrapods_trim <- tetrapods %>% filter(!is.na(stage_assignment)) #Add column which indicates latitude bin bins <- seq(from = -90, to = 90, by = 20) labels <- seq(from = -80, to = 80, by = 20) tetrapods_trim <- mutate(tetrapods_trim, paleolat_code = cut(Latitude, breaks = bins, labels = labels)) #Filter data to body fossils, choose environment/time interval stage_tetrapods <- tetrapods_trim %>% filter(pres_mode == "body") %>% filter(stage_assignment == "Olenekian") #Filter unique taxa #Create an empty dataset with PBDB column names to collect unique occurrences in unique_by_bin <- stage_tetrapods[FALSE,] #Loop through each palaeolatitude bin #For that bin, retain species occurrences, then retain unique taxa at gradually # higher taxonomic levels which are not already represented in that bin for (i in 1:(length(labels))) { print(i) one_bin <- stage_tetrapods %>% filter(paleolat_code == labels[i]) for (j in 1:(nrow(one_bin))) { if (nrow(one_bin) != 0) if (one_bin$accepted_rank[j] == "species") unique_by_bin <- rbind(unique_by_bin, one_bin[j,]) else if (!is.na(one_bin$genus[j])) (if (one_bin$genus[j] %in% one_bin$genus[-j] == F) unique_by_bin <- rbind(unique_by_bin, one_bin[j,])) else if (!is.na(one_bin$family[j])) (if (one_bin$family[j] %in% one_bin$family[-j] == F) unique_by_bin <- rbind(unique_by_bin, one_bin[j,])) else if (!is.na(one_bin$order[j])) (if (one_bin$order[j] %in% one_bin$order[-j] == F) unique_by_bin <- rbind(unique_by_bin, one_bin[j,])) } } ##Terrestrial## #Filter terrestrial fossils terrestrial <- filter(unique_by_bin, taxon_environment == "terrestrial") #Generate list of frequencies by stage #Achieved by using and trimming the 'count' function in dplyr, across a loop of stage names terres_lat_freq <- list() for (i in 1:length(labels)) { t_list <- terrestrial %>% filter(paleolat_code == labels[i]) %>% count(., accepted_name) %>% arrange(desc(n)) %>% select(n) t_list <- unlist(t_list, use.names = F) terres_lat_freq[[i]] <- t_list } names(terres_lat_freq) <- labels glimpse(terres_lat_freq) #Estimate diversity using squares method (Alroy, 2018) #Create an empty vector to store the diversity estimates t_squares_list <- vector("numeric", length = 0) #Loop through bins, estimating squares and appending to vector for(i in 1:length(terres_lat_freq)) { t_freq_list <- terres_lat_freq[[i]] if(is.na(t_freq_list[1])){t_freq_list <- 0} #convert empty bin NA to 0 if(t_freq_list[1] == 0){t_squares <- 0} else { t_sp_count <- length(t_freq_list) t_sing_count <- sum(t_freq_list == 1) t_ind_count <- sum(t_freq_list) t_sum_nsq <- sum(t_freq_list^2) t_squares <- t_sp_count + (((t_sing_count^2)*t_sum_nsq)/((t_ind_count^2) - (t_sing_count*t_sp_count))) if(t_squares == Inf){t_squares <- length(t_freq_list)} #too many singletons produces infinite estimate, corrects to observed abundance } t_squares_list <- append(t_squares_list, t_squares) } #Add bin labels to squares estimates t_to_plot <- data.frame(labels, squares = t_squares_list) ##Marine## #Filter marine fossils marine <- filter(unique_by_bin, taxon_environment == "marine") #Generate list of frequencies by stage #Achieved by using and trimming the 'count' function in dplyr, across a loop of stage names marine_lat_freq <- list() for (i in 1:length(labels)) { m_list <- marine %>% filter(paleolat_code == labels[i]) %>% count(., accepted_name) %>% arrange(desc(n)) %>% select(n) m_list <- unlist(m_list, use.names = F) marine_lat_freq[[i]] <- m_list } names(marine_lat_freq) <- labels glimpse(marine_lat_freq) #Estimate diversity using squares method (Alroy, 2018) m_squares_list <- vector("numeric", length = 0) for(i in 1:length(marine_lat_freq)) { m_freq_list <- marine_lat_freq[[i]] if(is.na(m_freq_list[1])){m_freq_list <- 0} if(m_freq_list[1] == 0){m_squares <- 0} else { m_sp_count <- length(m_freq_list) m_sing_count <- sum(m_freq_list == 1) m_ind_count <- sum(m_freq_list) m_sum_nsq <- sum(m_freq_list^2) m_squares <- m_sp_count + (((m_sing_count^2)*m_sum_nsq)/((m_ind_count^2) - (m_sing_count*m_sp_count))) if(m_squares == Inf){m_squares <- length(m_freq_list)} } m_squares_list <- append(m_squares_list, m_squares) } m_to_plot <- data.frame(labels, squares = m_squares_list) #Combine terrestrial and marine estimate dataframes for plotting t_to_plot$type <- "Terrestrial" m_to_plot$type <- "Marine" all_to_plot <- rbind(t_to_plot, m_to_plot) #Plot ggplot(all_to_plot, aes(x = labels, y = squares, group = type, colour = type)) + geom_line(size = 1.5) + geom_point(size = 2) + labs(x = "Palaeolatitude", y = "Subsampled diversity") + coord_flip() + scale_x_continuous(limits = c(-90, 90)) + scale_colour_manual(values = c("blue", "limegreen")) + geom_vline(aes(xintercept = 0), colour = "black", size = 0.7) + theme_classic() ########## SQS diversity by latitude graphs (Figures 1 & 2) ########## #setwd("#####") #Load packages library(tidyverse) library(iNEXT) #Read in dataset tetrapods <- read_csv("data/tetrapod_database_final.csv") glimpse(tetrapods) #Remove synonymy repeats (combinations of the same collection no. AND accepted name) tetrapods <- distinct(tetrapods, accepted_name, collection_no, .keep_all = T) #Trim data without stage assignment tetrapods_trim <- tetrapods %>% filter(!is.na(stage_assignment)) #Trim to body fossils tetrapods_trim <- filter(tetrapods_trim, pres_mode == "body") #Add column which indicates latitude bin bins <- seq(from = -90, to = 90, by = 20) labels <- seq(from = -80, to = 80, by = 20) tetrapods_trim <- mutate(tetrapods_trim, paleolat_code = cut(Latitude, breaks = bins, labels = labels)) #Filter data, choose taxon environment and stage stage_tetrapods <- tetrapods_trim %>% filter(stage_assignment == "Ladinian") %>% filter(taxon_environment == "marine") #Filter unique taxa #Create an empty dataset with PBDB column names to collect unique occurrences in unique_by_bin <- stage_tetrapods[FALSE,] #Loop through each palaeolatitude bin #For that bin, retain species occurrences, then retain unique taxa at gradually # higher taxonomic levels which are not already represented in that bin for (i in 1:(length(labels))) { print(i) one_bin <- stage_tetrapods %>% filter(paleolat_code == labels[i]) for (j in 1:(nrow(one_bin))) { if (nrow(one_bin) != 0) if (one_bin$accepted_rank[j] == "species") unique_by_bin <- rbind(unique_by_bin, one_bin[j,]) else if (!is.na(one_bin$genus[j])) (if (one_bin$genus[j] %in% one_bin$genus[-j] == F) unique_by_bin <- rbind(unique_by_bin, one_bin[j,])) else if (!is.na(one_bin$family[j])) (if (one_bin$family[j] %in% one_bin$family[-j] == F) unique_by_bin <- rbind(unique_by_bin, one_bin[j,])) else if (!is.na(one_bin$order[j])) (if (one_bin$order[j] %in% one_bin$order[-j] == F) unique_by_bin <- rbind(unique_by_bin, one_bin[j,])) } } #Generate a table of sample sizes by latitude bin, ordered numerically, and remove NAs stage_totals <- count(unique_by_bin, paleolat_code) for(i in 1:length(labels)) { if((labels[i] %in% stage_totals$paleolat_code) == FALSE) stage_totals <- rbind(stage_totals, c(labels[i], 1)) } #Reorder for ease of checking stage_totals <- arrange(stage_totals, paleolat_code) stage_totals_list <- pull(stage_totals, n) #Generate list of frequencies by stage, starting with total value, as needed by iNEXT #Achieved by using and trimming the 'count' function in dplyr, across a loop of stage names lat_freq <- list() for (i in 1:length(labels)) { lat_list <- unique_by_bin %>% filter(paleolat_code == labels[i]) %>% count(., accepted_name) %>% arrange(desc(n)) %>% add_row(n = sum(.$n), .before = 1) %>% select(n) lat_list <- unlist(lat_list, use.names = F) if(lat_list[1] < 3){lat_list <- NA} #if the list is shorter than 3 (2 species), make NA lat_freq[[i]] <- lat_list } names(lat_freq) <- labels glimpse(lat_freq) #Filter out empty lists lat_freq <- lat_freq[!is.na(lat_freq)] ###Code from Dunne et al. 2018### #"1 Coverage-standardised diversity curve" #Set quorum levels quorum_levels <- round(seq(from = 0.3, to = 0.8, by = 0.1), 1) #Estimate D using estimateD in iNEXT estD_list <- list() for(i in 1:length(quorum_levels)) { estD <- estimateD(lat_freq, datatype = "incidence_freq", base = "coverage", level = quorum_levels[i]) estD <- filter(estD, order == 0) #filter for species richness values estD$quorum_level <- quorum_levels[i] estD$reference_t <- stage_totals_list[(match(names(lat_freq), stage_totals$paleolat_code))] #add sample sizes in extra column estD$midpoints <- names(lat_freq) estD_list[[i]] <- estD } #Bind the individual dataframes into one estD_list <- bind_rows(estD_list) #Remove values when t is more than three times the sample size estD_list[which(estD_list$t >= 3 * estD_list$reference_t), c("qD", "qD.LCL", "qD.UCL")] <- rep(NA, 3) #no more than twice reference sample size View(estD_list) #Add empty bins as a plot point for(i in 1:length(labels)){ if((labels[i] %in% estD_list$site) == FALSE) estD_list <- rbind(estD_list, c(rep(NA, 5), 0, NA, NA, 0.4, NA, as.numeric(labels[i]))) } #Make quorum level a factor estD_list$midpoints <- as.numeric(as.character(estD_list$midpoints)) estD_list$quorum_level <- as.factor(estD_list$quorum_level) #Used "chartreuse" and "dodgerblue" colour sets colours <- c("dodgerblue1", "dodgerblue2", "dodgerblue3", "dodgerblue4") #Plot quorum levels 0.4 - 0.7 ggplot(filter(estD_list, quorum_level %in% quorum_levels[2:5]), aes(x = midpoints, y = qD, ymin = qD.LCL, ymax = qD.UCL, group = quorum_level, colour = quorum_level)) + geom_line(size = 1.5) + geom_point(size = 2) + geom_linerange(size = 1) + labs(x = "Palaeolatitude", y = "Subsampled diversity", colour = "Quorum level") + coord_flip() + scale_x_continuous(limits = c(-90, 90)) + geom_vline(aes(xintercept = 0), colour = "black", size = 0.7) + scale_colour_manual(values = colours) + theme_classic() ########## Wrangle collections and formations data (Figure S1) ########## #setwd("#####") #Load packages library(tidyverse) #Create a vector giving the chronological order of stages stages <- c("Wuchiapingian", "Changhsingian", "Induan", "Olenekian", "Anisian", "Ladinian", "Carnian", "Norian") #Create a vector of stage midpoints midpoints <- c(256.6, 253, 251.6, 249.2, 244.6, 239.5, 232, 217.8) #Create a vector giving the chronological order of substages substages <- c("Wuchiapingian", "Changhsingian", "Griesbachian", "Dienerian", "Smithian", "Spathian", "Aegean-Bithynian", "Pelsonian-Illyrian", "Fassanian", "Longobardian", "Julian", "Tuvalian", "Lacian", "Alaunian", "Sevatian") #Create a vector of substage midpoints submidpoints <- c(256.6, 253, 251.7, 251.4, 250.2, 248.2, 245.9, 243.3, 240.8, 238.3, 234.5, 229.5, 221.3, 213.8, 210.3) #Read in dataset collections <- read_csv("data/P-T collections.csv") View(collections) #Retain only collections with substage dating collsfilt <- collections %>% filter(is.na(late_interval)) %>% filter(early_interval %in% substages) #Create table of collection numbers per substage coll_counts <- count(collsfilt, early_interval) coll_counts <- coll_counts[match(substages, coll_counts$early_interval),] coll_counts$midpoints <- submidpoints coll_counts <- drop_na(coll_counts, n) write_csv(coll_counts, "data/P-T collection counts.csv") #Create table of formation numbers per substage form_counts <- collsfilt %>% filter(!is.na(formation)) %>% group_by(early_interval) %>% summarize(n = n_distinct(formation)) form_counts <- form_counts[match(substages, form_counts$early_interval),] form_counts$midpoints <- submidpoints form_counts <- drop_na(form_counts, n) write_csv(form_counts, "data/P-T formation counts.csv") ########## Plot collections and formations data (Figure S1) ########## #setwd("#####") #Load packages library(tidyverse) #Read in dataset tetrapods <- read_csv("data/tetrapod_database_ScotesePLL.csv") glimpse(tetrapods) #Wrangling #Check no incorrect spelling #unique(tetrapods$stage_assignment) #unique(tetrapods$substage_assignment) #Remove synonymy repeats (combinations of the same collection no. AND accepted name) tetrapods <- distinct(tetrapods, accepted_name, collection_no, .keep_all = T) #Remove occurrences without substage-level temporal resolution tetrapods <- filter(tetrapods, !is.na(substage_assignment)) ###Time line graph with collection and formation counts### #Total number of tetrapod collections (filtered by unique collection numbers) tp_collection_counts <- tetrapods %>% filter(!is.na(substage_assignment)) %>% filter(accepted_rank == "species") %>% distinct(collection_no, .keep_all = T) %>% count(substage_assignment) tp_collection_counts <- tp_collection_counts[match(substages, tp_collection_counts$substage_assignment),] tp_collection_counts$midpoints <- submidpoints tp_collection_counts$n[is.na(tp_collection_counts$n)] <- 0 #Total number of formations (filtered by unique formation names) tp_formation_counts <- tetrapods %>% filter(!is.na(substage_assignment)) %>% filter(accepted_rank == "species") %>% group_by(substage_assignment) %>% summarize(n = n_distinct(formation)) tp_formation_counts <- tp_formation_counts[match(substages, tp_formation_counts$substage_assignment),] tp_formation_counts$midpoints <- submidpoints tp_formation_counts$n[is.na(tp_formation_counts$n)] <- 0 #Read total P-T collections and formations summary tables collection_counts <- read_csv("data/P-T collection counts.csv") colnames(collection_counts)[1] <- "substage_assignment" collection_counts <- collection_counts[match(substages, collection_counts$substage_assignment),] formation_counts <- read_csv("data/P-T formation counts.csv") colnames(formation_counts)[1] <- "substage_assignment" formation_counts <- formation_counts[match(substages, formation_counts$substage_assignment),] #Combine into a single data frame tp_collection_counts$type <- "Tetrapod-bearing collections" tp_formation_counts$type <- "Tetrapod-bearing formations" collection_counts$type <- "Total collections" formation_counts$type <- "Total formations" data_counts <- rbind(tp_collection_counts, tp_formation_counts, collection_counts, formation_counts) ggplot(data_counts, aes(x = midpoints, y = log(n), group = type, colour = type)) + geom_line(size = 2) + scale_x_reverse(limits = c(260, 227)) + scale_y_continuous (position = "right") + labs(x = "Ma", y = "log(Count)") + geom_vline(aes(xintercept = 259.8), colour = "grey") + #Start of Wuchiapingian geom_vline(aes(xintercept = 254.14), colour = "grey") + #Changhsingian geom_vline(aes(xintercept = 252.17), colour = "grey") + #Induan geom_vline(aes(xintercept = 252.17), linetype = "longdash", colour = "red") + #PT geom_vline(aes(xintercept = 251.2), colour = "grey") + #Olenekian geom_vline(aes(xintercept = 247.2), colour = "grey") + #Anisian geom_vline(aes(xintercept = 242), colour = "grey") + #Ladinian geom_vline(aes(xintercept = 237), colour = "grey") + #Carnian geom_vline(aes(xintercept = 227), colour = "grey") + #End of Carnian scale_colour_manual(values = c("cyan1", "red1", "cyan4", "red4")) + theme_classic() + theme(legend.title=element_blank()) ########## Raw occurrences through time graph (Figure S1) ########## #setwd("#####") #Load packages library(tidyverse) #Create a vector giving the chronological order of stages stages <- c("Wuchiapingian", "Changhsingian", "Induan", "Olenekian", "Anisian", "Ladinian", "Carnian") #Create a vector of stage midpoints midpoints <- c(256.6, 253, 251.6, 249.2, 244.6, 239.5, 232) #Create a vector giving the chronological order of substages substages <- c("Wuchiapingian", "Changhsingian", "Griesbachian", "Dienerian", "Smithian", "Spathian", "Aegean-Bithynian", "Pelsonian-Illyrian", "Fassanian", "Longobardian", "Julian", "Tuvalian") #Create a vector of substage midpoints submidpoints <- c(256.6, 253, 251.7, 251.4, 250.2, 248.2, 245.9, 243.3, 240.8, 238.3, 234.5, 229.5) #Read in dataset tetrapods <- read_csv("data/tetrapod_database_final.csv") glimpse(tetrapods) #Wrangling #Check no incorrect spelling #unique(tetrapods$stage_assignment) #unique(tetrapods$substage_assignment) #Remove synonymy repeats (combinations of the same collection no. AND accepted name) tetrapods <- distinct(tetrapods, accepted_name, collection_no, .keep_all = T) #Remove occurrences without substage-level temporal resolution tetrapods <- filter(tetrapods, !is.na(substage_assignment)) ###Time line graph comparing fossil types/ecology occurrences### #Trace fossil species trace_counts <- tetrapods %>% filter(pres_mode == "trace") %>% count(substage_assignment) trace_counts <- trace_counts[match(substages, trace_counts$substage_assignment),] trace_counts$midpoints <- submidpoints trace_counts$n[is.na(trace_counts$n)] <- 0 #Terrestrial fossil species terres_counts <- tetrapods %>% filter(pres_mode == "body") %>% filter(taxon_environment == "terrestrial") %>% count(substage_assignment) terres_counts <- terres_counts[match(substages, terres_counts$substage_assignment),] terres_counts$midpoints <- submidpoints terres_counts$n[is.na(terres_counts$n)] <- 0 #Marine fossil species marine_counts <- tetrapods %>% filter(pres_mode == "body") %>% filter(taxon_environment == "marine") %>% count(substage_assignment) marine_counts <- marine_counts[match(substages, marine_counts$substage_assignment),] marine_counts$midpoints <- submidpoints marine_counts$n[is.na(marine_counts$n)] <- 0 #Combine into a single data frame trace_counts$type <- "Number of ichnospecies" terres_counts$type <- "Number of terrestrial species" marine_counts$type <- "Number of marine species" type_counts <- rbind(trace_counts, terres_counts, marine_counts) #Plot with geom_line ggplot(type_counts, aes(x = midpoints, y = n, group = type, colour = type)) + geom_line(size = 2) + scale_x_reverse(limits = c(260, 227)) + labs(x = "Ma", y = "Raw count") + geom_vline(aes(xintercept = 259.8), colour = "grey") + #Start of Wuchiapingian geom_vline(aes(xintercept = 254.14), colour = "grey") + #Changhsingian geom_vline(aes(xintercept = 252.17), colour = "grey") + #Induan geom_vline(aes(xintercept = 252.17), linetype = "longdash", colour = "red") + #PT geom_vline(aes(xintercept = 251.2), colour = "grey") + #Olenekian geom_vline(aes(xintercept = 247.2), colour = "grey") + #Anisian geom_vline(aes(xintercept = 242), colour = "grey") + #Ladinian geom_vline(aes(xintercept = 237), colour = "grey") + #Carnian geom_vline(aes(xintercept = 227), colour = "grey") + #End of Carnian scale_colour_manual(values = c("orange", "blue", "limegreen")) + theme_classic() + theme(legend.title=element_blank()) ########## Squares diversity through time graph (Figure S1) ########## #setwd("#####") #Load packages library(tidyverse) #Create a vector giving the chronological order of substages substages <- c("Wuchiapingian", "Changhsingian", "Griesbachian", "Smithian", "Spathian", "Aegean-Bithynian", "Pelsonian-Illyrian", "Fassanian", "Longobardian", "Julian", "Tuvalian") #Create a vector of substage midpoints submidpoints <- c(256.6, 253, 251.7, 250.2, 248.2, 245.9, 243.3, 240.8, 238.3, 234.5, 229.5) #Add Dienerian if needed, 251.4 #Read in dataset tetrapods <- read_csv("data/tetrapod_database_final.csv") glimpse(tetrapods) #Remove synonymy repeats (combinations of the same collection no. AND accepted name) tetrapods <- distinct(tetrapods, accepted_name, collection_no, .keep_all = T) #Trim data without substage assignment or species-level identification tetrapods <- tetrapods %>% filter(!is.na(substage_assignment)) #Create an empty dataset with PBDB column names to collect unique occurrences in unique_by_substage <- tetrapods[FALSE,] #Loop through each substage #For that substage, retain species occurrences, then retain unique taxa at gradually # higher taxonomic levels which are not already represented in that substage for (i in 1:(length(substages))) { print(i) one_substage <- tetrapods %>% filter(substage_assignment == substages[i]) for (j in 1:(nrow(one_substage))) { if (one_substage$accepted_rank[j] == "species") unique_by_substage <- rbind(unique_by_substage, one_substage[j,]) else if (!is.na(one_substage$genus[j])) (if (one_substage$genus[j] %in% one_substage$genus[-j] == F) unique_by_substage <- rbind(unique_by_substage, one_substage[j,])) else if (!is.na(one_substage$family[j])) (if (one_substage$family[j] %in% one_substage$family[-j] == F) unique_by_substage <- rbind(unique_by_substage, one_substage[j,])) else if (!is.na(one_substage$order[j])) (if (one_substage$order[j] %in% one_substage$order[-j] == F) unique_by_substage <- rbind(unique_by_substage, one_substage[j,])) } } ###Squares - terrestrial### #Filter terrestrial fossils terrestrial <- unique_by_substage %>% filter(pres_mode == "body") %>% filter(taxon_environment == "terrestrial") #Generate list of frequencies by substage #Achieved by using and trimming the 'count' function in dplyr, across a loop of substage names terres_substage_freq <- list() for (k in 1:length(substages)) { t_list <- terrestrial %>% filter(substage_assignment == substages[k]) %>% count(., accepted_name) %>% arrange(desc(n)) %>% select(n) t_list <- unlist(t_list, use.names = F) terres_substage_freq[[k]] <- t_list } names(terres_substage_freq) <- substages glimpse(terres_substage_freq) #Estimate diversity using squares method (Alroy, 2018) t_squares_list <- vector("numeric", length = 0) for(i in 1:length(terres_substage_freq)) { t_freq_list <- terres_substage_freq[[i]] if(is.na(t_freq_list[1])){t_freq_list <- 0} if(t_freq_list[1] == 0){t_squares <- 0} else { t_sp_count <- length(t_freq_list) t_sing_count <- sum(t_freq_list == 1) t_ind_count <- sum(t_freq_list) t_sum_nsq <- sum(t_freq_list^2) t_squares <- t_sp_count + (((t_sing_count^2)*t_sum_nsq)/((t_ind_count^2) - (t_sing_count*t_sp_count))) if(t_squares == Inf){t_squares <- length(t_freq_list)} } t_squares_list <- append(t_squares_list, t_squares) } t_to_plot <- data.frame(substages, squares = t_squares_list, submidpoints) ###Squares - marine### #Filter marine fossils marine <- unique_by_substage %>% filter(pres_mode == "body") %>% filter(taxon_environment == "marine") #Generate list of frequencies by substage #Achieved by using and trimming the 'count' function in dplyr, across a loop of substage names marine_substage_freq <- list() for (i in 1:length(substages)) { m_list <- marine %>% filter(substage_assignment == substages[i]) %>% count(., accepted_name) %>% arrange(desc(n)) %>% select(n) m_list <- unlist(m_list, use.names = F) marine_substage_freq[[i]] <- m_list } names(marine_substage_freq) <- substages glimpse(marine_substage_freq) #Estimate diversity using squares method (Alroy, 2018) m_squares_list <- vector("numeric", length = 0) for(i in 1:length(marine_substage_freq)) { m_freq_list <- marine_substage_freq[[i]] if(is.na(m_freq_list[1])){m_freq_list <- 0} if(m_freq_list[1] == 0){m_squares <- 0} else { m_sp_count <- length(m_freq_list) m_sing_count <- sum(m_freq_list == 1) m_ind_count <- sum(m_freq_list) m_sum_nsq <- sum(m_freq_list^2) m_squares <- m_sp_count + (((m_sing_count^2)*m_sum_nsq)/((m_ind_count^2) - (m_sing_count*m_sp_count))) if(m_squares == Inf){m_squares <- length(m_freq_list)} } m_squares_list <- append(m_squares_list, m_squares) } m_to_plot <- data.frame(substages, squares = m_squares_list, submidpoints) #Plot t_to_plot$type <- "Terrestrial" m_to_plot$type <- "Marine" all_to_plot <- rbind(t_to_plot, m_to_plot) ggplot(all_to_plot, aes(x = submidpoints, y = squares, group = type, colour = type)) + geom_line(size = 1) + geom_point() + scale_colour_manual(values = c("blue", "limegreen")) + scale_x_reverse(limits = c(260, 227)) + labs(x = "Ma", y = "Squares diversity") + geom_vline(aes(xintercept = 259.8), colour = "grey") + #Start of Wuchiapingian geom_vline(aes(xintercept = 254.14), colour = "grey") + #Changhsingian geom_vline(aes(xintercept = 252.17), colour = "grey") + #Induan geom_vline(aes(xintercept = 252.17), linetype = "longdash", colour = "red") + #PT geom_vline(aes(xintercept = 251.2), colour = "grey") + #Olenekian geom_vline(aes(xintercept = 247.2), colour = "grey") + #Anisian geom_vline(aes(xintercept = 242), colour = "grey") + #Ladinian geom_vline(aes(xintercept = 237), colour = "grey") + #Carnian geom_vline(aes(xintercept = 227), colour = "grey") + #End of Carnian theme_classic() ########## SQS diversity through time graph (Figure S1) ########## #setwd("#####") #Load packages library(tidyverse) library(iNEXT) #Create a vector giving the chronological order of substages substages <- c("Wuchiapingian", "Changhsingian", "Griesbachian", "Smithian", "Spathian", "Aegean-Bithynian", "Pelsonian-Illyrian", "Fassanian", "Longobardian", "Julian", "Tuvalian") #Create a vector of substage midpoints submidpoints <- c(256.6, 253, 251.7, 250.2, 248.2, 245.9, 243.3, 240.8, 238.3, 234.5, 229.5) #Add Dienerian if needed, 251.4 #Read in dataset tetrapods <- read_csv("data/tetrapod_database_final.csv") glimpse(tetrapods) #Remove synonymy repeats (combinations of the same collection no. AND accepted name) tetrapods <- distinct(tetrapods, accepted_name, collection_no, .keep_all = T) #Trim data without substage assignment or species-level identification tetrapods <- tetrapods %>% filter(!is.na(substage_assignment)) #Create an empty dataset with PBDB column names to collect unique occurrences in unique_by_substage <- tetrapods[FALSE,] #Loop through each substage #For that substage, retain species occurrences, then retain unique taxa at gradually # higher taxonomic levels which are not already represented in that substage for (i in 1:(length(substages))) { print(i) one_substage <- tetrapods %>% filter(substage_assignment == substages[i]) for (j in 1:(nrow(one_substage))) { if (one_substage$accepted_rank[j] == "species") unique_by_substage <- rbind(unique_by_substage, one_substage[j,]) else if (!is.na(one_substage$genus[j])) (if (one_substage$genus[j] %in% one_substage$genus[-j] == F) unique_by_substage <- rbind(unique_by_substage, one_substage[j,])) else if (!is.na(one_substage$family[j])) (if (one_substage$family[j] %in% one_substage$family[-j] == F) unique_by_substage <- rbind(unique_by_substage, one_substage[j,])) else if (!is.na(one_substage$order[j])) (if (one_substage$order[j] %in% one_substage$order[-j] == F) unique_by_substage <- rbind(unique_by_substage, one_substage[j,])) } } ###SQS - terrestrial### #Filter terrestrial fossils terrestrial <- unique_by_substage %>% filter(pres_mode == "body") %>% filter(taxon_environment == "terrestrial") #Generate a table of sample sizes by stage, ordered chronologically terres_totals <- count(terrestrial, substage_assignment) #terres_totals <- group_by(terrestrial, substage_assignment) %>% summarise(coll_count = n_distinct(collection_no)) terres_totals <- terres_totals[match(substages, terres_totals$substage_assignment),] #Generate list of frequencies by substage, starting with total value, as needed by iNEXT #Achieved by using and trimming the 'count' function in dplyr, across a loop of substage names terres_substage_freq <- list() for (i in 1:length(substages)) { t_list <- terrestrial %>% filter(substage_assignment == substages[i]) %>% count(., accepted_name) %>% arrange(desc(n)) %>% add_row(n = sum(.$n), .before = 1) %>% select(n) t_list <- unlist(t_list, use.names = F) if(t_list[1] < 3){t_list <- NA} terres_substage_freq[[i]] <- t_list } names(terres_substage_freq) <- substages glimpse(terres_substage_freq) #Filter out empty lists terres_substage_freq <- terres_substage_freq[!is.na(terres_substage_freq)] ###Code from Dunne et al. 2018### #"1 Coverage-standardised diversity curve" #Set quorum levels quorum_levels <- round(seq(from = 0.1, to = 0.9, by = 0.1), 1) #Estimate D using estimateD in iNEXT t.estD_list <- list() for(i in 1:length(quorum_levels)) { t.estD <- estimateD(terres_substage_freq, datatype = "incidence_freq", base = "coverage", level = quorum_levels[i]) t.estD <- filter(t.estD, order == 0) #filter for species richness values t.estD$quorum_level <- quorum_levels[i] t.estD$reference_t <- terres_totals$n #add sample sizes in extra column t.estD$midpoints <- submidpoints[match(names(terres_substage_freq), substages)] t.estD_list[[i]] <- t.estD } #Bind the individual dataframes into one t.estD_list <- bind_rows(t.estD_list) #Remove values when t is more than three times the sample size t.estD_list[which(t.estD_list$t >= 3 * t.estD_list$reference_t), c("qD", "qD.LCL", "qD.UCL")] <- rep(NA, 3) View(t.estD_list) #Make quorum level a factor t.estD_list$quorum_level <- as.factor(t.estD_list$quorum_level) #Plot quorum levels 0.4 - 0.7 ggplot(filter(t.estD_list, quorum_level %in% quorum_levels[4:7]), aes(x = midpoints, y = qD, ymin = qD.LCL, ymax = qD.UCL, group = quorum_level, colour = quorum_level)) + geom_line(size = 1) + geom_point() + geom_linerange() + scale_colour_manual(values = c("chartreuse1", "chartreuse2", "chartreuse3", "chartreuse4")) + scale_x_reverse(limits = c(260, 227)) + labs(x = "Ma", y = "Subsampled diversity", colour = "Quorum level") + geom_vline(aes(xintercept = 259.8), colour = "grey") + #Start of Wuchiapingian geom_vline(aes(xintercept = 254.14), colour = "grey") + #Changhsingian geom_vline(aes(xintercept = 252.17), colour = "grey") + #Induan geom_vline(aes(xintercept = 252.17), linetype = "longdash", colour = "red") + #PT geom_vline(aes(xintercept = 251.2), colour = "grey") + #Olenekian geom_vline(aes(xintercept = 247.2), colour = "grey") + #Anisian geom_vline(aes(xintercept = 242), colour = "grey") + #Ladinian geom_vline(aes(xintercept = 237), colour = "grey") + #Carnian geom_vline(aes(xintercept = 227), colour = "grey") + #End of Carnian theme_classic() ###SQS - marine### #Filter marine fossils marine <- unique_by_substage %>% filter(pres_mode == "body") %>% filter(taxon_environment == "marine") #Generate a table of sample sizes by stage, ordered chronologically marine_totals <- count(marine, substage_assignment) marine_totals <- marine_totals[match(substages, marine_totals$substage_assignment),] marine_totals <- filter(marine_totals, !is.na(n)) #Generate list of frequencies by substage, starting with total value, as needed by iNEXT #Achieved by using and trimming the 'count' function in dplyr, across a loop of substage names marine_substage_freq <- list() for (i in 1:length(substages)) { m_list <- marine %>% filter(substage_assignment == substages[i]) %>% count(., accepted_name) %>% arrange(desc(n)) %>% add_row(n = sum(.$n), .before = 1) %>% select(n) m_list <- unlist(m_list, use.names = F) if(m_list[1] < 3){m_list <- NA} marine_substage_freq[[i]] <- m_list } names(marine_substage_freq) <- substages glimpse(marine_substage_freq) #Filter out empty lists marine_substage_freq <- marine_substage_freq[!is.na(marine_substage_freq)] ###Code from Dunne et al. 2018### #"1 Coverage-standardised diversity curve" #Set quorum levels quorum_levels <- round(seq(from = 0.1, to = 0.9, by = 0.1), 1) #Estimate D using estimateD in iNEXT m.estD_list <- list() for(i in 1:length(quorum_levels)) { m.estD <- estimateD(marine_substage_freq, datatype = "incidence_freq", base = "coverage", level = quorum_levels[i]) m.estD <- filter(m.estD, order == 0) #filter for species richness values m.estD$quorum_level <- quorum_levels[i] m.estD$reference_t <- marine_totals$n #add sample sizes in extra column m.estD$midpoints <- submidpoints[match(names(marine_substage_freq), substages)] m.estD_list[[i]] <- m.estD } #Bind the individual dataframes into one m.estD_list <- bind_rows(m.estD_list) #Remove values when t is more than three times the sample size m.estD_list[which(m.estD_list$t >= 3 * m.estD_list$reference_t), c("qD", "qD.LCL", "qD.UCL")] <- rep(NA, 3) View(m.estD_list) #Make quorum level a factor m.estD_list$quorum_level <- as.factor(m.estD_list$quorum_level) #Plot quorum levels 0.4 - 0.7 ggplot(filter(m.estD_list, quorum_level %in% quorum_levels[4:7]), aes(x = midpoints, y = qD, ymin = qD.LCL, ymax = qD.UCL, group = quorum_level, colour = quorum_level)) + geom_line(size = 1) + geom_point() + geom_linerange() + scale_colour_manual(values = c("dodgerblue1", "dodgerblue2", "dodgerblue3", "dodgerblue4")) + scale_x_reverse(limits = c(260, 227)) + scale_y_continuous (position = "right") + labs(x = "Ma", y = "Subsampled diversity", colour = "Quorum level") + geom_vline(aes(xintercept = 259.8), colour = "grey") + #Start of Wuchiapingian geom_vline(aes(xintercept = 254.14), colour = "grey") + #Changhsingian geom_vline(aes(xintercept = 252.17), colour = "grey") + #Induan geom_vline(aes(xintercept = 252.17), linetype = "longdash", colour = "red") + #PT geom_vline(aes(xintercept = 251.2), colour = "grey") + #Olenekian geom_vline(aes(xintercept = 247.2), colour = "grey") + #Anisian geom_vline(aes(xintercept = 242), colour = "grey") + #Ladinian geom_vline(aes(xintercept = 237), colour = "grey") + #Carnian geom_vline(aes(xintercept = 227), colour = "grey") + #End of Carnian theme_classic() ########## Rarefaction curves (Figure S3) ########## #setwd("#####") #Load packages library(tidyverse) library(iNEXT) #Read in dataset tetrapods <- read_csv("data/tetrapod_database_final.csv") glimpse(tetrapods) #Remove synonymy repeats (combinations of the same collection no. AND accepted name) tetrapods <- distinct(tetrapods, accepted_name, collection_no, .keep_all = T) #Trim data without stage assignment tetrapods_trim <- tetrapods %>% filter(!is.na(stage_assignment)) #Trim to body fossils tetrapods_trim <- filter(tetrapods_trim, pres_mode == "body") #Add column which indicates latitude bin bins <- seq(from = -90, to = 90, by = 20) labels <- seq(from = -80, to = 80, by = 20) tetrapods_trim <- mutate(tetrapods_trim, paleolat_code = cut(Latitude, breaks = bins, labels = labels)) #Filter data, choose stage(s) and environment stage_tetrapods <- tetrapods_trim %>% filter(stage_assignment == "Anisian" | stage_assignment == "Ladinian") %>% filter(taxon_environment == "marine") #Filter unique taxa #Create an empty dataset with PBDB column names to collect unique occurrences in unique_by_bin <- stage_tetrapods[FALSE,] #Loop through each palaeolatitude bin #For that bin, retain species occurrences, then retain unique taxa at gradually # higher taxonomic levels which are not already represented in that bin for (i in 1:(length(labels))) { print(i) one_bin <- stage_tetrapods %>% filter(paleolat_code == labels[i]) for (j in 1:(nrow(one_bin))) { if (nrow(one_bin) != 0) if (one_bin$accepted_rank[j] == "species") unique_by_bin <- rbind(unique_by_bin, one_bin[j,]) else if (!is.na(one_bin$genus[j])) (if (one_bin$genus[j] %in% one_bin$genus[-j] == F) unique_by_bin <- rbind(unique_by_bin, one_bin[j,])) else if (!is.na(one_bin$family[j])) (if (one_bin$family[j] %in% one_bin$family[-j] == F) unique_by_bin <- rbind(unique_by_bin, one_bin[j,])) else if (!is.na(one_bin$order[j])) (if (one_bin$order[j] %in% one_bin$order[-j] == F) unique_by_bin <- rbind(unique_by_bin, one_bin[j,])) } } #Generate a table of sample sizes by latitude bin, ordered numerically, and remove NAs stage_totals <- count(unique_by_bin, paleolat_code) for(i in 1:length(labels)) { if((labels[i] %in% stage_totals$paleolat_code) == FALSE) stage_totals <- rbind(stage_totals, c(labels[i], 1)) } #Reorder for ease of checking stage_totals <- arrange(stage_totals, paleolat_code) stage_totals_list <- pull(stage_totals, n) #Generate list of frequencies by stage, starting with total value, as needed by iNEXT #Achieved by using and trimming the 'count' function in dplyr, across a loop of stage names lat_freq <- list() for (i in 1:length(labels)) { lat_list <- stage_tetrapods %>% filter(paleolat_code == labels[i]) %>% count(., accepted_name) %>% arrange(desc(n)) %>% add_row(n = sum(.$n), .before = 1) %>% select(n) lat_list <- unlist(lat_list, use.names = F) if(lat_list[1] < 3){lat_list <- NA} lat_freq[[i]] <- lat_list } names(lat_freq) <- labels glimpse(lat_freq) #Filter out empty lists lat_freq <- lat_freq[!is.na(lat_freq)] #Run iNEXT function inc.data <- iNEXT(lat_freq, q = 0, datatype = "incidence_freq", knots = 100) #Extract rarefaction points cov_rare <- inc.data$iNextEst for(i in 1:length(cov_rare)) { cov_rare[[i]]$stage_int <- names(cov_rare)[i] } cov_rare <- do.call(rbind, cov_rare) %>% tbl_df() #convert to tibble for ease of plotting #Plot ggplot(data = cov_rare, aes(x = SC, y = qD, ymin = qD.LCL, ymax = qD.UCL, fill = stage_int, colour = stage_int, lty = method)) + geom_line(size = 1) + scale_linetype_manual(values=c("dotted", "solid", "longdash")) + labs(x = "Coverage", y = "Species richness") + theme_classic() ########## Bootstraps (Figure S4) ########## #setwd("#####") #Load packages library(tidyverse) #Read in dataset tetrapods <- read_csv("data/tetrapod_database_final.csv") glimpse(tetrapods) #Remove synonymy repeats (combinations of the same collection no. AND accepted name) tetrapods <- distinct(tetrapods, accepted_name, collection_no, .keep_all = T) #Trim data without stage assignment tetrapods_trim <- tetrapods %>% filter(!is.na(stage_assignment)) #Add column which indicates latitude bin bins <- seq(from = -90, to = 90, by = 20) labels <- seq(from = -80, to = 80, by = 20) tetrapods_trim <- mutate(tetrapods_trim, paleolat_code = cut(Latitude, breaks = bins, labels = labels)) #Choose filters as desired tetrapods_trim <- tetrapods_trim %>% filter(pres_mode == "body") %>% filter(taxon_environment == "marine") %>% filter(stage_assignment == "Induan" | stage_assignment == "Olenekian") collections <- unique(tetrapods_trim$collection_no) #Create frames to store sampled collection numbers and the matching curve for each iteration sample_list <- data.frame() gradients <- data.frame() #Indicate number of bootstraps and reps required -> 250 for terrestrial, 30 for marine, 100 reps of each bootstraps <- 30 reps <- 100 for (x in 1:reps){ print(x) #Create subsampled bootstrap of 100 occurrences (with replacement) colls_to_sample <- base::sample(collections, bootstraps, replace = T) subset <- tetrapods_trim[FALSE,] for (k in 1:bootstraps){ one_coll <- filter(tetrapods_trim, collection_no == colls_to_sample[k]) subset <- rbind(subset, one_coll) } #Filter unique taxa #Create an empty dataset with PBDB column names to collect unique occurrences in unique_by_bin <- tetrapods_trim[FALSE,] #Loop through each collection #For that collection, retain species occurrences, then retain unique taxa at gradually # higher taxonomic levels which are not already represented in that collection # i.e. if there is an indeterminate dicynodont but no occurrences in that collection which # are dicynodonts but more specifically identified, the occurrence is retained for (i in 1:(length(labels))) { one_bin <- subset %>% filter(paleolat_code == labels[i]) for (j in 1:(nrow(one_bin))) { if (nrow(one_bin) != 0) if (one_bin$accepted_rank[j] == "species") unique_by_bin <- rbind(unique_by_bin, one_bin[j,]) else if (!is.na(one_bin$genus[j])) (if (one_bin$genus[j] %in% one_bin$genus[-j] == F) unique_by_bin <- rbind(unique_by_bin, one_bin[j,])) else if (!is.na(one_bin$family[j])) (if (one_bin$family[j] %in% one_bin$family[-j] == F) unique_by_bin <- rbind(unique_by_bin, one_bin[j,])) else if (!is.na(one_bin$order[j])) (if (one_bin$order[j] %in% one_bin$order[-j] == F) unique_by_bin <- rbind(unique_by_bin, one_bin[j,])) } } #Remove repeats of species names, to get one occurrence per unique species per substage unique_by_bin <- distinct(unique_by_bin, accepted_name, paleolat_code, .keep_all = T) #Generate counts raw_curve <- count(unique_by_bin, paleolat_code) #Fill in gaps for(m in 1:length(labels)) { if((labels[m] %in% raw_curve$paleolat_code) == FALSE) raw_curve <- rbind(raw_curve, c(labels[m], 0)) } #Reorder for ease of checking raw_curve <- arrange(raw_curve, desc(paleolat_code)) raw_curve$paleolat_code <- as.numeric(as.character(raw_curve$paleolat_code)) #Add collection sample and curve for each iteration to a list sample_list <- rbind(sample_list, colls_to_sample) gradients <- rbind(gradients, raw_curve$n) } names(sample_list) <- 1:bootstraps names(gradients) <- sort(labels, decreasing = T) #reorder labels to match order in output write.csv(sample_list, file = "data/Bootstrap sample list.csv") #write csv of sampled collections write.csv(gradients, file = "data/Bootstrap gradients.csv") #write csv of gradients produced for each bootstrap #sample_list <- read_csv("data/Bootstrap sample list.csv") #gradients <- read_csv("data/Bootstrap gradients.csv") #gradients <- gradients[,-1] #Generate summary statistics means <- colMeans(gradients) stand_devs <- apply(gradients, 2, sd) error <- qnorm(0.95)*(stand_devs/sqrt(reps)) lower <- means - error upper <- means + error #Create data frame containing summary statistics ready for plotting to_plot <- cbind(labels = sort(labels, decreasing = T), means, lower, upper) to_plot <- data.frame(to_plot) #Designate colour, limegreen or blue colour <- "limegreen" #Plot mean raw richness across bootstraps, with 95% confidence intervals ggplot(to_plot, aes(x = labels, y = means, ymin = lower, ymax = upper)) + geom_line(size = 1, col = colour) + geom_point(size = 2, col = colour) + geom_linerange(col = colour) + labs(x = "Palaeolatitude", y = "Raw species richness count") + coord_flip() + scale_x_continuous(limits = c(-90, 90)) + geom_vline(aes(xintercept = 0), colour = "black") + scale_colour_manual(values = "green") + theme_classic() ########## Alpha diversity (Table S2) ########## #setwd("#####") #Load packages library(tidyverse) #Read in dataset tetrapods <- read_csv("data/tetrapod_database_final.csv") glimpse(tetrapods) #Remove synonymy repeats (combinations of the same collection no. AND accepted name) tetrapods <- distinct(tetrapods, accepted_name, collection_no, .keep_all = T) #Trim data without stage assignment tetrapods_trim <- tetrapods %>% filter(!is.na(stage_assignment)) #Add column which indicates latitude bin bins <- seq(from = -90, to = 90, by = 20) labels <- seq(from = -80, to = 80, by = 20) tetrapods_trim <- mutate(tetrapods_trim, paleolat_code = cut(Latitude, breaks = bins, labels = labels)) #Filter unique taxa #Create an empty dataset with PBDB column names to collect unique occurrences in new_dataset <- tetrapods_trim[FALSE,] #Create a vector of unique collection numbers collections <- unique(tetrapods_trim$collection_no) #Loop through each collection #For that collection, retain species occurrences, then retain unique taxa at gradually # higher taxonomic levels which are not already represented in that collection # i.e. if there is an indeterminate dicynodont but no occurrences in that collection which # are dicynodonts but more specifically identified, the occurrence is retained for (i in 1:(length(collections))) { print(i) one_coll <- tetrapods_trim %>% filter(collection_no == collections[i]) for (j in 1:(nrow(one_coll))) { if (one_coll$accepted_rank[j] == "species") new_dataset <- rbind(new_dataset, one_coll[j,]) else if (!is.na(one_coll$genus[j])) (if (one_coll$genus[j] %in% one_coll$genus[-j] == F) new_dataset <- rbind(new_dataset, one_coll[j,])) else if (!is.na(one_coll$family[j])) (if (one_coll$family[j] %in% one_coll$family[-j] == F) new_dataset <- rbind(new_dataset, one_coll[j,])) else if (!is.na(one_coll$order[j])) (if (one_coll$order[j] %in% one_coll$order[-j] == F) new_dataset <- rbind(new_dataset, one_coll[j,])) } } #Filter as desired subset <- new_dataset %>% filter(pres_mode == "body") %>% filter(taxon_environment == "terrestrial") %>% filter(stage_assignment == "Wuchiapingian") #Create string of frequencies per collection, and generate a mean across it (choose your palaeolatitude bin) count_string <- subset %>% filter(paleolat_code == -60) %>% count(collection_no) %>% pull(n) mean(count_string) ########## Grid cell occupation (Table S3) ########## #setwd("#####") #Load packages library(tidyverse) library(raster) #Be wary that some function names are used in both packages - notation is explicit in places to # reduce the chance of error #Read in dataset tetrapods <- read_csv("data/tetrapod_database_final.csv") glimpse(tetrapods) #Remove synonymy repeats (combinations of the same collection no. AND accepted name) tetrapods <- distinct(tetrapods, accepted_name, collection_no, .keep_all = T) #Trim data without stage assignment tetrapods_trim <- tetrapods %>% filter(!is.na(stage_assignment)) #Add column which indicates latitude bin bins <- seq(from = -90, to = 90, by = 20) labels <- seq(from = -80, to = 80, by = 20) tetrapods_trim <- mutate(tetrapods_trim, paleolat_code = cut(Latitude, breaks = bins, labels = labels)) #Choose filters as desired tetrapods_trim <- tetrapods_trim %>% filter(pres_mode == "body") %>% filter(taxon_environment == "terrestrial") %>% filter(stage_assignment == "Induan" | stage_assignment == "Olenekian") #Filter to unique collection numbers tetrapods_trim <- distinct(tetrapods_trim, collection_no, .keep_all = T) #Create grid raster with one degree cells (rather than equal area) x_raster <- raster(ncol=360, nrow=180, xmn=-180, xmx=180, ymn=-90, ymx=90) x_raster <- setValues(x_raster, c(1:ncell(x_raster))) #Create empty string to add values to cell_count <- vector(mode = "numeric", length = 0) for (i in 1:length(labels)){ #Filter to bin bin_colls <- tetrapods_trim %>% filter(paleolat_code == labels[i]) #Pull coordinates into a spatial points data frame bin_colls <- dplyr::select(bin_colls, collection_no, Longitude, Latitude) if (nrow(bin_colls) > 0) points <- SpatialPointsDataFrame(bin_colls[,c("Longitude", "Latitude")], bin_colls) else points <- NA #Calculate occupancy occupancy <- raster::extract(x_raster, points) if (nrow(bin_colls) > 0) no_occ <- length(unique(occupancy)) else no_occ <- 0 #Append to vector cell_count <- append(cell_count, no_occ) } #Label the cell count vector rbind(labels, cell_count)