--- title: "Sticky_Fish" author: "Sarah Roberts" date: "8/21/2020" output: html_document --- #README This file does the following: 1. Load packages and functions needed to run the code. Prepare the NEFSC bottom trawl dataset from the original format. The original data can be downloaded from oceanadapt.rutgers.edu 2. Check stratum to select stratum that were consistently sampled 3. Select species that are present in a certain number of trawls 4. Run gams to determine how species CPUE are influenced by different environmental covariates. 5. Calculate shifts in geographic distributions such as biomass weighted mean centroids and range sizes. 6. load species classifications from literature 7. Combine the data 8. Create figures and perform two sided wilcoxon nonparametric tests You will need the following datasets for this to run. The ocean adapt datasets can be downloaded from https://oceanadapt.rutgers.edu/ FROM OCEAN ADAPT: "neus_Survdat.RData" - Northeast US bottom trawl survey data, in binary format (readable by R). Each line is a record for a species caught within a haul/tow on a cruise. These data have already had conversion factors applied and filtered for "good" tows (shg < 136). CRUISE6: six-digit cruise id (first four digits are the year) STATION: station number (within a cruise) STRATUM: statistical stratum SVSPP: species id. matches to SVSPP in neusSVSPP.RData file CATCHSEX: sex of the catch 0: unsexed 1: male 2: female 3-6? SVVESSEL: vessel id YEAR: year SEASON: season (FALL or SPRING) LAT: latitude in decimal degrees LON: longitude in decimal degrees DEPTH: depth in meters SURFTEMP: surface temperature in degrees Celsius SURFSALIN: surface salinity BOTTEMP: bottom temperature, deg C BOTSALIN: bottom salinity "neus_SVSPP.RData" - Northeast US bottom trawl species data SCINAME: scientific name SVSPP: species id COMNAME: common name AUTHOR: author for the species "neusStrata.csv" Area for each statistical stratum in the Aleutian Island, Eastern Bering Sea, Gulf of Alaska, and Northeast US surveys. Not all files have all fields. NPFMCArea: region name SubareaDescription: subarea name StratumCode: code for the stratum (matches STRATUM in ai, ebs, and goa files) OldStratumCode: stratum code from older classification system (only neus) DepthIntervalm: range of depths for the stratum, in meters Areakm2: area of the stratum in km2 Areanmi2: area in square nautical miles FROM AUTHORS WORK: "ocean_adapt_hab_vars.csv" - NEFSC lat long and time data combined with NAO and AMO data from Hurrel and TNC North Atlantic Marine Ecosystem Regional Assessment dataset in ArcGis. See methodology for details. "spp_depth_profiles.csv" - depth profiles collected from literature for each species as well as the information on species orders and types (i.e fish or invertebrate) #1 Prep ##1.1 packages Load the packages needed to complete the project. ```{r} library(tidyverse) #for data manipulation library(dplyr) #for data manipulation library(mgcv) #for running gams #spatial library(sp) #mean centroid library(spatialEco)#mean centroid library(geosphere)#mean centroid library(rgdal)#kernel density library(adehabitatHR)#kernel density library(ks)#kernel density library(sf)#kernel density library(raster) #kernel density projection #plotting library(ggplot2) library(ggpmisc) library(reshape) library(ggpubr) library(ggsci) library(scales) library(rnaturalearth) #gives you some nice shapefiles ``` ##1.2 functions These functions are used in file manipulation and have been adapted from Ocean Adapt code. ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) Mode <- function(x) { ux <- unique(x) ux[which.max(tabulate(match(x, ux)))] } # weighted mean for use with summarize(). values in col 1, weights in col 2 wgtmean = function(x, na.rm=FALSE) {questionr::wtd.mean(x=x[,1], weights=x[,2], na.rm=na.rm)} wgtse = function(x, na.rm=TRUE){ if(sum(!is.na(x[,1]) & !is.na(x[,2]))>1){ if(na.rm){ return(sqrt(wtd.var(x=x[,1], weights=x[,2], na.rm=TRUE, normwt=TRUE))/sqrt(sum(!is.na(x[,1] & !is.na(x[,2]))))) } else { return(sqrt(wtd.var(x=x[,1], weights=x[,2], na.rm=FALSE, normwt=TRUE))/sqrt(length(x))) # may choke on wtd.var without removing NAs } } else { return(NA) # NA if vector doesn't have at least 2 values } } se <- function(x) sd(x)/sqrt(length(x)) # assumes no NAs sumna <- function(x){ #acts like sum(na.rm=T) but returns NA if all are NA if(!all(is.na(x))) return(sum(x, na.rm=T)) if(all(is.na(x))) return(NA) } meanna = function(x){ if(!all(is.na(x))) return(mean(x, na.rm=T)) if(all(is.na(x))) return(NA) } ``` ##1.3 Load and clean data ```{r} # Compile NEUS =========================================================== print("Compile NEUS") load("neus_Survdat.RData") load("neus_SVSPP.RData") neus_survdat <- survdat %>% # select specific columns dplyr::select(CRUISE6, STATION, STRATUM, SVSPP, CATCHSEX, SVVESSEL, YEAR, SEASON, EST_TOWDATE, LAT, LON, DEPTH, SURFTEMP, SURFSALIN, BOTTEMP, BOTSALIN, ABUNDANCE, BIOMASS) %>% # remove duplicates distinct() %>% mutate(SVVESSEL = as.character(SVVESSEL), SEASON = as.character(SEASON)) # sum different sexes of same spp together #okay so now we know that the biomass column was given to them as the calibrated cpue neus_survdat <- neus_survdat %>% group_by(YEAR, SEASON, EST_TOWDATE, LAT, LON, DEPTH, CRUISE6, STATION, STRATUM, SVSPP) %>% summarise(wtcpue = sum(BIOMASS)) # repeat for spp file neus_spp <- spp %>% # remove some columns from spp data dplyr::select(-ITISSPP, -COMNAME, -AUTHOR) %>% mutate(SCINAME = as.character(SCINAME)) files <- as.list(dir(pattern = "neus_strata", path = "", full.names = T)) neus_strata <- read_csv(here::here("", "neus_strata.csv"), col_types = cols( STRGRP_DESC = col_character(), STRATUM = col_double(), STRATUM_NAME = col_character(), STRATUM_AREA = col_double(), MIDLAT = col_double(), MIDLON = col_double(), MINLAT = col_double(), MAXLAT = col_double(), MINLON = col_double(), MAXLON = col_double() )) %>% dplyr::select(STRATUM, STRATUM_AREA) %>% distinct() neus <- left_join(neus_survdat, neus_spp, by = "SVSPP") %>% left_join(neus_strata, by = "STRATUM") # are there any strata in the data that are not in the strata file? # stopifnot(nrow(filter(neus, is.na(STRATUM_AREA))) == 0) neus <- neus %>% mutate( # Create a unique haulid haulid = paste(formatC(CRUISE6, width=6, flag=0), formatC(STATION, width=3, flag=0), formatC(STRATUM, width=4, flag=0), sep='-'), # Calculate stratum area where needed (use convex hull approach) # convert square nautical miles to square kilometers stratumarea = STRATUM_AREA * 3.429904) %>% dplyr::rename(year = YEAR, date = EST_TOWDATE, spp = SCINAME, lat = LAT, lon = LON, depth = DEPTH, stratum = STRATUM) %>% filter( # remove unidentified spp and non-species spp != "" | !is.na(spp), !grepl("EGG", spp), !grepl("UNIDENTIFIED", spp)) %>% group_by(haulid, stratum, stratumarea, year, date, lat, lon, depth, spp, SEASON) %>% summarise(wtcpue = sumna(wtcpue)) %>% dplyr::select(haulid, year, date, lat, lon, stratum, stratumarea, depth, spp, wtcpue, SEASON) %>% ungroup() write.csv(neus, "neus_done_by_sarah.csv") ``` this takes about a minute ```{r} #create a haulid for the survey data as well. survdat <- survdat %>% mutate( # Create a unique haulid haulid = paste(formatC(CRUISE6, width=6, flag=0), formatC(STATION, width=3, flag=0), formatC(STRATUM, width=4, flag=0), sep='')) write.csv(survdat, "survey_data.csv") ``` ##1.4 Tidy data ```{r} ocean_adapt <- read.csv("neus_done_by_sarah.csv") ``` manipulate ocean adapt to be in a tidy format. ```{r} #this takes about a minute to run ocean_adapt <- ocean_adapt %>% group_by_at(vars(-wtcpue)) %>% # group by everything other than the value column. mutate(row_id=1:n()) %>% ungroup() %>% # build group index spread(key=spp, value=wtcpue) # spread ocean_adapt$haulid1 <- gsub(pattern = "-", replacement = "", x = ocean_adapt$haulid) #this takes about 20 minutes ocean_adapt_select <- ocean_adapt %>% dplyr::select(-SEASON, -date, -row_id) %>% group_by(haulid1) %>% replace(is.na(.), 0) %>% summarise_all(mean) ocean_adapt_info <- ocean_adapt %>% dplyr::select(haulid1, SEASON, date) #this also takes about 20 minutes ocean_adapt_info <- ocean_adapt_info %>% dplyr::group_by(haulid1) %>% summarise(SEASON= Mode(SEASON), DATE= Mode(date)) ocean_adapt_select1 <- left_join(ocean_adapt_select, ocean_adapt_info, by="haulid1") ocean_adapt_select1$haulid <- ocean_adapt_select1$haulid1 ocean_adapt_select1$date<- ocean_adapt_select1$DATE ocean_adapt_latlongdate <- ocean_adapt_select1 %>% dplyr::select(haulid, year, lat, lon, date) write.csv(ocean_adapt_latlongdate, "ocean_adapt_latlongdate.csv") #now go back into gis and get the habitat variables using a spatial join (make sure the projections match up for this to work.) total_full <- ocean_adapt_select1 ``` #2. Check stratum figure out stratum that were consistently sampled every year and every season every year. ```{r} lat_long_hab_vars <- read.csv("ocean_adapt_hab_vars.csv") #fix this lat_long_hab_vars$haulid <- as.character(lat_long_hab_vars$haulid) total_full <- left_join(total_full, lat_long_hab_vars, by="haulid") total_full <- total_full %>% dplyr::select(-ID, -GRIDCODE, -OBJECTID, -date.y, -lon.y, -lat.y, -year.y, -Field1, -JOIN_FID, -TARGET_FID, -Join_Count) names(total_full) <- gsub(".x", "", names(total_full)) names(total_full) <- gsub(".y", "", names(total_full)) #need to add bottom temperature and salinity data to it survey_data <- read.csv("survey_data.csv") survey_data <- survey_data %>% group_by(haulid) %>% summarise(bottemp = mean(BOTTEMP), surftemp = mean(SURFTEMP), botsalin = mean(BOTSALIN), surfsalin= mean(SURFSALIN)) survey_data$haulid <- as.character(survey_data$haulid) total_full <- left_join(total_full, survey_data, by="haulid") write.csv(total_full, "total_full.csv") #this is because they changed trawl doors to catch more pelagic species in 1985 total_full <- subset(total_full, total_full$year > 1985) total_full$season <- total_full$SEASON tot_fall <- subset(total_full, season=="FALL") tot_spring <- subset(total_full, season=="SPRING") fall_strat_m <- tot_fall %>% group_by(stratum) %>% summarise( years_fall = length(unique(year))) spring_strat_m <- tot_spring %>% group_by(stratum) %>% summarise( years_spring = length(unique(year))) tot_strat_m <- full_join(fall_strat_m, spring_strat_m, by=c("stratum")) #selecting out consistently sampled stratum for spring (32) and fall (32 years) datasets fall_strat_m_select <- subset(fall_strat_m, years_fall>=32) spring_strat_m_select <- subset(spring_strat_m, years_spring>=32) fall_stratum <- as.vector(fall_strat_m_select$stratum) spring_stratum <- as.vector(spring_strat_m_select$stratum) tot_fall <- tot_fall[tot_fall$stratum %in% fall_stratum,] tot_spring <- tot_spring[tot_spring$stratum %in% spring_stratum,] write.csv(tot_fall, "fall.csv") write.csv(tot_spring, "spring.csv") total <- rbind(tot_fall, tot_spring) write.csv(total, "total.csv") ``` Alrighty we should have spring and fall datasets with stratum that were sampled every year for each season #3. Selecting species Selecting based on species that were present in a certain number of years ```{r} #go back to original data load("neus_Survdat.RData") load("neus_SVSPP.RData") #which species are present in multiple years in both the spring and fall periods dat <- left_join(survdat, spp, by=c("SVSPP")) dat <- subset(dat, dat$YEAR >1985) all_fall <- subset(dat, SEASON == "FALL") all_spring <- subset(dat, SEASON == "SPRING") fall_y <- all_fall %>% group_by(SCINAME) %>% summarise( years_fall = length(unique(YEAR))) spring_y <- all_spring %>% group_by(SCINAME) %>% summarise( years_spring = length(unique(YEAR))) spp_by_year <- left_join(fall_y, spring_y, by=c("SCINAME")) #this is selecting species that were collected in the spring and fall for at least half of the dataset (16 out of 33 years) spp_by_year_selected <- subset(spp_by_year, spp_by_year$years_fall >15 & spp_by_year$years_spring >15) spp_by_year_selected <- left_join(spp_by_year_selected, spp, by=c("SCINAME")) write.csv(spp_by_year_selected, "spp_by_year_selected.csv") specnames <- as.vector(spp_by_year_selected$SCINAME) ``` specnames should have 151 species in it #4.Gams Run gams on each species (for spring and fall separately) to determine the deviance explained by each predictor variable. ```{r} library(mgcv) # create data frame to store results total <- read.csv("total.csv") colnames(total) <- gsub("[[:punct:]]$", "", colnames(total)) specnames <- read.csv("spp_by_year_selected.csv") #need to add a _ specnames$SCINAME <- gsub(" ", ".", specnames$SCINAME) specnames$SCINAME <- gsub("[[:punct:]]$", "", specnames$SCINAME) #for some reason it was putting punctuation at the end. get rid of that specnames <- specnames[1:151,] spp <- specnames specnames <- as.vector(specnames$SCINAME) #fall fall <- subset(total, total$season=="FALL") fall_gam_results <- data.frame() fall_gam_stand <- data.frame() fall_gam_dev <- data.frame(matrix(ncol=7, nrow=1)) colnames(fall_gam_dev) <- c("names", "dev.depth", "dev.bt", "dev.sst", "dev.bsal", "dev.ssal", "dev.sed") for (i in 1:length(specnames)) { #for each species in spec_list fit <- gam(as.formula(paste(specnames[i],"~ depth + bottemp + botsalin + SEDIMENT + surfsalin + botsalin")), data=fall) #standardized <- lm.beta(fit) fit_depth <- gam(as.formula(paste(specnames[i],"~ 1 + depth")), data=total) fit_bt <- gam(as.formula(paste(specnames[i],"~ bottemp")), data=total) fit_ssal <- gam(as.formula(paste(specnames[i],"~ surfsalin")), data=total) fit_sed <- gam(as.formula(paste(specnames[i],"~ SEDIMENT")), data=total) fit_bsal <- gam(as.formula(paste(specnames[i],"~ botsalin")), data=total) fit_sst <- gam(as.formula(paste(specnames[i],"~ surftemp")), data=total) dev.depth <- summary(fit_depth)$dev.expl dev.bt <- summary(fit_bt)$dev.expl dev.ssal <- summary(fit_ssal)$dev.expl dev.sst <- summary(fit_sst)$dev.expl dev.bsal <- summary(fit_bsal)$dev.expl dev.sed <- summary(fit_sed)$dev.expl dev_table <- list("null", dev.depth, dev.bt, dev.sst, dev.bsal, dev.ssal, dev.sed) dev_table <- as.data.frame(dev_table) colnames(dev_table) <- c("names", "dev.depth", "dev.bt", "dev.sst", "dev.bsal", "dev.ssal", "dev.sed") dev_table$names <- specnames[i] fall_gam_dev <- rbind(fall_gam_dev, dev_table) } #spring spring <- subset(total, total$season=="SPRING") spring_gam_results <- data.frame() spring_gam_stand <- data.frame() spring_gam_dev <- data.frame(matrix(ncol=7, nrow=1)) colnames(spring_gam_dev) <- c("names", "dev.depth", "dev.bt", "dev.sst", "dev.bsal", "dev.ssal", "dev.sed") for (i in 1:length(specnames)) { #for each species in spec_list fit <- gam(as.formula(paste(specnames[i],"~ depth + bottemp + botsalin + SEDIMENT + surfsalin + botsalin")), data=spring) #standardized <- lm.beta(fit) fit_depth <- gam(as.formula(paste(specnames[i],"~ 1 + depth")), data=total) fit_bt <- gam(as.formula(paste(specnames[i],"~ bottemp")), data=total) fit_ssal <- gam(as.formula(paste(specnames[i],"~ surfsalin")), data=total) fit_sed <- gam(as.formula(paste(specnames[i],"~ SEDIMENT")), data=total) fit_bsal <- gam(as.formula(paste(specnames[i],"~ botsalin")), data=total) fit_sst <- gam(as.formula(paste(specnames[i],"~ surftemp")), data=total) dev.depth <- summary(fit_depth)$dev.expl dev.bt <- summary(fit_bt)$dev.expl dev.ssal <- summary(fit_ssal)$dev.expl dev.sst <- summary(fit_sst)$dev.expl dev.bsal <- summary(fit_bsal)$dev.expl dev.sed <- summary(fit_sed)$dev.expl dev_table <- list("null", dev.depth, dev.bt, dev.sst, dev.bsal, dev.ssal, dev.sed) dev_table <- as.data.frame(dev_table) colnames(dev_table) <- c("names", "dev.depth", "dev.bt", "dev.sst", "dev.bsal", "dev.ssal", "dev.sed") dev_table$names <- specnames[i] spring_gam_dev <- rbind(spring_gam_dev, dev_table) } #combine with the common name data spring_gam_dev <- spring_gam_dev[-1,] spring_gam_dev <- left_join(spring_gam_dev, spp, by=c("names" = "SCINAME")) fall_gam_dev <- fall_gam_dev[-1,] fall_gam_dev <- left_join(fall_gam_dev, spp, by=c("names" = "SCINAME")) write.csv(fall_gam_dev, "fall_gam_dev.csv") write.csv(spring_gam_dev, "spring_gam_dev.csv") ``` #5. geographic calcs ##5.1 cent bio ```{r} # Calculate mean position through time for species ## Calculate mean latitude and depth of each species by year within each survey/region ### mean lat/lon/depth for each stratum ocean_adapt <- neus_done_by_sarah ocean_adapt_fall <- subset(ocean_adapt, ocean_adapt$SEASON=="FALL") dat_strat <- ocean_adapt_fall %>% dplyr::select(stratum, lat, lon, depth, stratumarea, haulid) %>% distinct(stratum, haulid, .keep_all = T) %>% group_by(stratum) %>% summarise(lat = meanna(lat), lon = meanna(lon), depth = meanna(depth), stratumarea = meanna(stratumarea)) ### mean wtcpue in each stratum/yr/spp (new code includes more lines because it ### includes rows that do not have a common name) dat_strat_yr <- ocean_adapt_fall %>% group_by(spp, stratum, year) %>% summarise(wtcpue = meanna(wtcpue)) # add stratum lat/lon/depth/area dat_strat_yr <- left_join(dat_strat_yr, dat_strat, by = c("stratum")) # index of biomass per stratum: mean wtcpue times area dat_strat_yr <- dat_strat_yr %>% mutate(wttot = wtcpue * stratumarea) # calculate mean lat cent_bio_lat <- dat_strat_yr %>% group_by(spp, year) %>% summarise(lat = questionr::wtd.mean(lat, wttot, na.rm = T)) # mean depth cent_bio_depth <- dat_strat_yr %>% group_by(spp, year) %>% summarise(depth = questionr::wtd.mean(depth, wttot, na.rm = T)) # mean lon cent_bio_lon <- dat_strat_yr %>% group_by(spp, year) %>% summarise(lon = questionr::wtd.mean(lon, wttot, na.rm = T)) # merge cent_bio <- left_join(cent_bio_lat, cent_bio_depth, by = c( "spp","year")) cent_bio <- left_join(cent_bio, cent_bio_lon, by = c("spp", "year")) # standard error for lat cent_bio_lat_se <- dat_strat_yr %>% group_by(spp, year) %>% summarise(lat_se = sqrt(questionr::wtd.var(lat, wttot, na.rm=TRUE, normwt=TRUE))/sqrt(sum(!is.na(lat) & !is.na(wttot)))) cent_bio <- left_join(cent_bio, cent_bio_lat_se, by = c("spp", "year")) cent_bio_depth_se <- dat_strat_yr %>% group_by(spp, year) %>% summarise(depth_se = sqrt(questionr::wtd.var(depth, wttot, na.rm=TRUE, normwt=TRUE))/sqrt(sum(!is.na(depth) & !is.na(wttot)))) cent_bio <- left_join(cent_bio, cent_bio_depth_se, by = c("spp", "year")) cent_bio_lon_se <- dat_strat_yr %>% group_by(spp, year) %>% summarise(lon_se = sqrt(questionr::wtd.var(lon, wttot, na.rm=TRUE, normwt=TRUE))/sqrt(sum(!is.na(lon) & !is.na(wttot)))) cent_bio <- left_join(cent_bio, cent_bio_lon_se, by = c("spp", "year")) BY_SPECIES_DATA <- cent_bio %>% ungroup() %>% arrange(spp, year) cent_bio_fall <- cent_bio write.csv(cent_bio_fall, "cent_bio_fall.csv") #spring ocean_adapt_spring <- subset(ocean_adapt, ocean_adapt$SEASON=="SPRING") dat_strat <- ocean_adapt_spring %>% dplyr::select(stratum, lat, lon, depth, stratumarea, haulid) %>% distinct(stratum, haulid, .keep_all = T) %>% group_by(stratum) %>% summarise(lat = meanna(lat), lon = meanna(lon), depth = meanna(depth), stratumarea = meanna(stratumarea)) ### mean wtcpue in each stratum/yr/spp (new code includes more lines because it ### includes rows that do not have a common name) dat_strat_yr <- ocean_adapt_spring %>% group_by(spp, stratum, year) %>% summarise(wtcpue = meanna(wtcpue)) # add stratum lat/lon/depth/area dat_strat_yr <- left_join(dat_strat_yr, dat_strat, by = c("stratum")) # index of biomass per stratum: mean wtcpue times area dat_strat_yr <- dat_strat_yr %>% mutate(wttot = wtcpue * stratumarea) # calculate mean lat cent_bio_lat <- dat_strat_yr %>% group_by(spp, year) %>% summarise(lat = questionr::wtd.mean(lat, wttot, na.rm = T)) # mean depth cent_bio_depth <- dat_strat_yr %>% group_by(spp, year) %>% summarise(depth = questionr::wtd.mean(depth, wttot, na.rm = T)) # mean lon cent_bio_lon <- dat_strat_yr %>% group_by(spp, year) %>% summarise(lon = questionr::wtd.mean(lon, wttot, na.rm = T)) # merge cent_bio <- left_join(cent_bio_lat, cent_bio_depth, by = c( "spp","year")) cent_bio <- left_join(cent_bio, cent_bio_lon, by = c("spp", "year")) # standard error for lat cent_bio_lat_se <- dat_strat_yr %>% group_by(spp, year) %>% summarise(lat_se = sqrt(questionr::wtd.var(lat, wttot, na.rm=TRUE, normwt=TRUE))/sqrt(sum(!is.na(lat) & !is.na(wttot)))) cent_bio <- left_join(cent_bio, cent_bio_lat_se, by = c("spp", "year")) cent_bio_depth_se <- dat_strat_yr %>% group_by(spp, year) %>% summarise(depth_se = sqrt(questionr::wtd.var(depth, wttot, na.rm=TRUE, normwt=TRUE))/sqrt(sum(!is.na(depth) & !is.na(wttot)))) cent_bio <- left_join(cent_bio, cent_bio_depth_se, by = c("spp", "year")) cent_bio_lon_se <- dat_strat_yr %>% group_by(spp, year) %>% summarise(lon_se = sqrt(questionr::wtd.var(lon, wttot, na.rm=TRUE, normwt=TRUE))/sqrt(sum(!is.na(lon) & !is.na(wttot)))) cent_bio <- left_join(cent_bio, cent_bio_lon_se, by = c("spp", "year")) BY_SPECIES_DATA <- cent_bio %>% ungroup() %>% arrange(spp, year) cent_bio_spring <- cent_bio write.csv(cent_bio_spring, "cent_bio_spring.csv") ``` ##5.2 distance shifted calculate the distance shifted from the first five years to the last five years. ```{r} cent_bio_spring <- as.data.frame(cent_bio_spring) cent_bio_spring$spp <- gsub(" ", ".", cent_bio_spring$spp) cent_bio_spring$spp <- gsub("[[:punct:]]$", "", cent_bio_spring$spp) #for some reason it was putting punctuation at the end. get rid of that cent_bio_spring <- cent_bio_spring[cent_bio_spring$spp %in% specnames,] cent_bio_spring[cent_bio_spring== "NaN"] = NA #spread the data to be by year and spp cent_bio_spring_lat <- cent_bio_spring %>% dplyr::select(spp, lat, year) %>% spread(key=year, value=lat) %>% group_by(spp) %>% summarise_all(mean) cent_bio_spring_lon <- cent_bio_spring %>% dplyr::select(spp, lon, year) %>% spread(key=year, value=lon) %>% group_by(spp) %>% summarise_all(mean) #make a first 5 years and last 5 years dataset cent_bio_spring_lon$`1986_1990` <- rowMeans(cent_bio_spring_lon[20:24], na.rm=T) cent_bio_spring_lon$`2014_2018` <- rowMeans(cent_bio_spring_lon[48:52], na.rm=T) cent_bio_spring_lat$`1986_1990` <- rowMeans(cent_bio_spring_lat[20:24], na.rm=T) cent_bio_spring_lat$`2014_2018` <- rowMeans(cent_bio_spring_lat[48:52], na.rm=T) spring_first_five <- cbind(cent_bio_spring_lon$`1986_1990`, cent_bio_spring_lat$`1986_1990`) spring_last_five <- cbind(cent_bio_spring_lon$`2014_2018`, cent_bio_spring_lat$`2014_2018`) spring_dist_5yr <- as.data.frame(distGeo(spring_first_five, spring_last_five)) #vector of distances in meters #NaN exists when the species wasnt present in any of those five years. If its 0 need to convert to na spring_dist_5yr$spp <- cent_bio_spring_lon$spp spring_dist_5yr[spring_dist_5yr== 0] = NA spring_dist_5yr[spring_dist_5yr== "NaN"] = NA #fall #subset only the species in specnames cent_bio_fall <- as.data.frame(cent_bio_fall) cent_bio_fall$spp <- gsub(" ", ".", cent_bio_fall$spp) cent_bio_fall$spp <- gsub("[[:punct:]]$", "", cent_bio_fall$spp) #for some reason it was putting punctuation at the end. get rid of that cent_bio_fall <- cent_bio_fall[cent_bio_fall$spp %in% specnames,] cent_bio_fall[cent_bio_fall== "NaN"] = NA #spread the data to be by year and spp cent_bio_fall_lat <- cent_bio_fall %>% dplyr::select(spp, lat, year) %>% spread(key=year, value=lat) %>% group_by(spp) %>% summarise_all(mean) cent_bio_fall_lon <- cent_bio_fall %>% dplyr::select(spp, lon, year) %>% spread(key=year, value=lon) %>% group_by(spp) %>% summarise_all(mean) #make a first 5 years and last 5 years dataset cent_bio_fall_lon$`1986_1990` <- rowMeans(cent_bio_fall_lon[25:29], na.rm=T) cent_bio_fall_lon$`2014_2018` <- rowMeans(cent_bio_fall_lon[53:57], na.rm=T) cent_bio_fall_lat$`1986_1990` <- rowMeans(cent_bio_fall_lat[25:29], na.rm=T) cent_bio_fall_lat$`2014_2018` <- rowMeans(cent_bio_fall_lat[53:57], na.rm=T) fall_first_five <- cbind(cent_bio_fall_lon$`1986_1990`, cent_bio_fall_lat$`1986_1990`) fall_last_five <- cbind(cent_bio_fall_lon$`2014_2018`, cent_bio_fall_lat$`2014_2018`) fall_dist_5yr <- as.data.frame(distGeo(fall_first_five, fall_last_five)) #vector of distances in meters #NaN exists when the species wasnt present in any of those five years. If its 0 need to convert to na fall_dist_5yr$spp <- cent_bio_fall_lon$spp fall_dist_5yr[fall_dist_5yr== 0] = NA fall_dist_5yr[fall_dist_5yr== "NaN"] = NA write.csv(fall_dist_5yr, "fall_dist_5yr.csv") write.csv(spring_dist_5yr, "spring_dist_5yr.csv") fall_dist_5yr_last5 <- cbind(fall_last_five, fall_dist_5yr) fall_dist_5yr_first5 <- cbind(fall_first_five, fall_dist_5yr) spring_dist_5yr_last5 <- cbind(spring_last_five, spring_dist_5yr) spring_dist_5yr_first5 <- cbind(spring_first_five, spring_dist_5yr) write.csv(fall_dist_5yr_last5, "fall_dist_5yr_last5.csv") write.csv(fall_dist_5yr_first5, "fall_dist_5yr_first5.csv") write.csv(spring_dist_5yr_last5, "spring_dist_5yr_last5.csv") write.csv(spring_dist_5yr_first5, "spring_dist_5yr_first5.csv") ``` ##5.3 kernel density biomass weighted kernel density. First make spatial objects ```{r} total <- read.csv("total.csv") fall <- read.csv("fall.csv") spring <- read.csv("spring.csv") names(fall)<-make.names(names(fall),unique = TRUE) names(spring)<-make.names(names(spring),unique = TRUE) #Lets select out the time period chunks we need. total_fall_sp_86_90 <- subset(fall, year < 1991) total_fall_sp_14_18 <- subset(fall, year > 2013) total_spring_sp_86_90 <- subset(spring, year < 1991) total_spring_sp_14_18 <- subset(spring, year > 2013) #total_sp_86_90 <- total_sp_86_90 %>% drop_na() coordinates(total_fall_sp_86_90) <- ~ lon + lat coordinates(total_spring_sp_86_90) <- ~ lon + lat coordinates(total_fall_sp_14_18) <- ~ lon + lat coordinates(total_spring_sp_14_18) <- ~ lon + lat ``` ###5.3.1 fall Then run kernel density calcs including biomass weighted area size and min and max latitude and longitude. ```{r} spec_vol_fall_86_90 <- data.frame(matrix(ncol=6, nrow=1)) colnames(spec_vol_fall_86_90) <- c("names", "vol", "xmin", "xmax", "ymin", "ymax") for (i in 1:length(specnames)) { tryCatch({#for each species in spec_list test_kd <- sp.kde(x = total_fall_sp_86_90, y = as.matrix(total_fall_sp_86_90@data[specnames[i]]), bw=5, nr=100, nc= 100, standardize = TRUE) projection(test_kd) <- "+proj=longlat +datum=WGS84" #test_kd_m <- projectRaster(test_kd, crs = "+proj=utm +zone=17 +ellps=GRS80 +datum=NAD83 +units=m +no_defs") #this is used with raster::area(test_kd) to find cell size in meters vol_rast <- calc(test_kd, fun=function(test_kd){ test_kd[test_kd < .95] <- NA; return(test_kd)} ) range <- trim(vol_rast) vol <- as.matrix(range@data@values) vol <- apply(vol, 2, function(x) length(which(!is.na(x)))) #cell size in degrees 0.1006667, 0.0895 # cell size in meters is 26317.956589, 24828.26971 vol <- vol* 8590*10000 #this is the 95% range in meters squared xmin <- range@extent@xmin xmax <- range@extent@xmax ymin <- range@extent@ymin ymax <- range@extent@ymax name <- specnames[i] table_1 <- list(name, vol, xmin, xmax, ymin, ymax) table_1 <- as.data.frame(table_1) colnames(table_1) <- c("names", "vol", "xmin", "xmax", "ymin", "ymax") spec_vol_fall_86_90 <- rbind(spec_vol_fall_86_90, table_1) }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")}) } #second spec_vol_fall_14_18 <- data.frame(matrix(ncol=6, nrow=1)) colnames(spec_vol_fall_14_18) <- c("names", "vol", "xmin", "xmax", "ymin", "ymax") for (i in 1:length(specnames)) { tryCatch({#for each species in spec_list test_kd <- sp.kde(x = total_fall_sp_14_18, y = as.matrix(total_fall_sp_14_18@data[specnames[i]]), bw=5, nr=100, nc= 100, standardize = TRUE) projection(test_kd) <- "+proj=longlat +datum=WGS84" #test_kd_m <- projectRaster(test_kd, crs = "+proj=utm +zone=17 +ellps=GRS80 +datum=NAD83 +units=m +no_defs") #this is used with raster::area(test_kd) to find cell size in meters vol_rast <- calc(test_kd, fun=function(test_kd){ test_kd[test_kd < .95] <- NA; return(test_kd)} ) range <- trim(vol_rast) vol <- as.matrix(range@data@values) vol <- apply(vol, 2, function(x) length(which(!is.na(x)))) #cell size in degrees 0.2394558, 0.22755102 # cell size in meters is 26317.956589, 24828.26971 vol <- vol* 8590*10000 #this is the 95% range in meters squared xmin <- range@extent@xmin xmax <- range@extent@xmax ymin <- range@extent@ymin ymax <- range@extent@ymax name <- specnames[i] table_1 <- list(name, vol, xmin, xmax, ymin, ymax) table_1 <- as.data.frame(table_1) colnames(table_1) <- c("names", "vol", "xmin", "xmax", "ymin", "ymax") spec_vol_fall_14_18 <- rbind(spec_vol_fall_14_18, table_1) }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")}) } write.csv(spec_vol_fall_86_90, file="kernel_fall_first_area.csv") write.csv(spec_vol_fall_14_18, file="kernel_fall_second_area.csv") ``` ###5.3.2 spring ```{r} spec_vol_spring_86_90 <- data.frame(matrix(ncol=6, nrow=1)) colnames(spec_vol_spring_86_90) <- c("names", "vol", "xmin", "xmax", "ymin", "ymax") for (i in 1:length(specnames)) { tryCatch({#for each species in spec_list test_kd <- sp.kde(x = total_spring_sp_86_90, y = as.matrix(total_spring_sp_86_90@data[specnames[i]]), bw=5, nr=100, nc= 100, standardize = TRUE) projection(test_kd) <- "+proj=longlat +datum=WGS84" #test_kd_m <- projectRaster(test_kd, crs = "+proj=utm +zone=17 +ellps=GRS80 +datum=NAD83 +units=m +no_defs") #this is used with raster::area(test_kd) to find cell size in meters vol_rast <- calc(test_kd, fun=function(test_kd){ test_kd[test_kd < .95] <- NA; return(test_kd)} ) range <- trim(vol_rast) vol <- as.matrix(range@data@values) vol <- apply(vol, 2, function(x) length(which(!is.na(x)))) #cell size in degrees 0.1006667, 0.0895 # cell size in meters is 26317.956589, 24828.26971 vol <- vol* 8590*10000 #this is the 95% range in meters squared xmin <- range@extent@xmin xmax <- range@extent@xmax ymin <- range@extent@ymin ymax <- range@extent@ymax name <- specnames[i] table_1 <- list(name, vol, xmin, xmax, ymin, ymax) table_1 <- as.data.frame(table_1) colnames(table_1) <- c("names", "vol", "xmin", "xmax", "ymin", "ymax") spec_vol_spring_86_90 <- rbind(spec_vol_spring_86_90, table_1) }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")}) } #second spec_vol_spring_14_18 <- data.frame(matrix(ncol=6, nrow=1)) colnames(spec_vol_spring_14_18) <- c("names", "vol", "xmin", "xmax", "ymin", "ymax") for (i in 1:length(specnames)) { tryCatch({#for each species in spec_list test_kd <- sp.kde(x = total_spring_sp_14_18, y = as.matrix(total_spring_sp_14_18@data[specnames[i]]), bw=5, nr=100, nc= 100, standardize = TRUE) projection(test_kd) <- "+proj=longlat +datum=WGS84" #test_kd_m <- projectRaster(test_kd, crs = "+proj=utm +zone=17 +ellps=GRS80 +datum=NAD83 +units=m +no_defs") #this is used with raster::area(test_kd) to find cell size in meters vol_rast <- calc(test_kd, fun=function(test_kd){ test_kd[test_kd < .95] <- NA; return(test_kd)} ) range <- trim(vol_rast) vol <- as.matrix(range@data@values) vol <- apply(vol, 2, function(x) length(which(!is.na(x)))) #cell size in degrees 0.2394558, 0.22755102 # cell size in meters is 26317.956589, 24828.26971 vol <- vol* 8590*10000 #this is the 95% range in meters squared xmin <- range@extent@xmin xmax <- range@extent@xmax ymin <- range@extent@ymin ymax <- range@extent@ymax name <- specnames[i] table_1 <- list(name, vol, xmin, xmax, ymin, ymax) table_1 <- as.data.frame(table_1) colnames(table_1) <- c("names", "vol", "xmin", "xmax", "ymin", "ymax") spec_vol_spring_14_18 <- rbind(spec_vol_spring_14_18, table_1) }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")}) } write.csv(spec_vol_spring_86_90, file="kernel_spring_first_area.csv") write.csv(spec_vol_spring_14_18, file="kernel_spring_second_area.csv") ``` ###5.3.4 simplify kernel data clean column names etc. ```{r} #add convex hull data spec_vol_spring_86_90 <- read.csv("kernel_spring_first_area.csv") spec_vol_spring_14_18 <- read.csv("kernel_spring_second_area.csv") spec_vol_fall_86_90 <- read.csv("kernel_fall_first_area.csv") spec_vol_fall_14_18 <- read.csv("kernel_fall_second_area.csv") convex_hull_spring <- inner_join(spec_vol_spring_86_90, spec_vol_spring_14_18, by="names") convex_hull_spring$area_diff_5yr <- convex_hull_spring$vol.y - convex_hull_spring$vol.x #positive means gain area convex_hull_spring$area_percchange_5yr <- (convex_hull_spring$vol.y - convex_hull_spring$vol.x) / convex_hull_spring$vol.x * 100 convex_hull_spring$south_lat_change <- convex_hull_spring$ymin.y - convex_hull_spring$ymin.x convex_hull_spring$north_lat_change <- convex_hull_spring$ymax.y - convex_hull_spring$ymax.x convex_hull_fall <- inner_join(spec_vol_fall_86_90, spec_vol_fall_14_18, by="names") convex_hull_fall$area_diff_5yr <- convex_hull_fall$vol.y - convex_hull_fall$vol.x #positive means gain area convex_hull_fall$area_percchange_5yr <- (convex_hull_fall$vol.y - convex_hull_fall$vol.x) / convex_hull_fall$vol.x * 100 convex_hull_fall$south_lat_change <- convex_hull_fall$ymin.y - convex_hull_fall$ymin.x convex_hull_fall$north_lat_change <- convex_hull_fall$ymax.y - convex_hull_fall$ymax.x convex_hull_spring <- convex_hull_spring %>% dplyr::rename(first_area = vol.x, second_area=vol.y) convex_hull_fall <- convex_hull_fall %>% dplyr::rename(first_area = vol.x, second_area=vol.y) write.csv(convex_hull_fall, file="convex_hull_fall.csv") write.csv(convex_hull_spring, file="convex_hull_spring.csv") ``` #6. species classifications I've gone through and classified all of the species according to the literature. That file is uploaded here and joined to the spp_by_year selected file ```{r} spp_class <- read.csv("spp_by_year_selected.csv") spp_class$SCINAME <- gsub(pattern = " ", replacement = ".", x = spp_class$SCINAME) spp_class$SCINAME <- gsub("[[:punct:]]$", "", spp_class$SCINAME) #for some reason it was putting punctuation at the end. get rid of that spp_prof <- read.csv("spp_depth_profiles.csv") spp_prof$SCINAME <- gsub("[[:punct:]]$", "", spp_prof$SCINAME) #for some reason it was putting punctuation at the end. get rid of that spp_class <- left_join(spp_class, spp_prof, by=c("SCINAME")) ``` #7. combining it all combine all of the data into one dataset ```{r} fall_gam_dev <- read.csv("fall_gam_dev.csv") spring_gam_dev <- read.csv("spring_gam_dev.csv") spring_dist_5yr <- read.csv("spring_dist_5yr.csv") fall_dist_5yr <- read.csv("fall_dist_5yr.csv") convex_hull_spring <- read.csv("convex_hull_spring.csv") convex_hull_fall <- read.csv("convex_hull_fall.csv") #fix strange punctuation at the end of some species spring_dist_5yr$spp <- gsub("[[:punct:]]$", "", spring_dist_5yr$spp) #for some reason it was putting punctuation at the end. get rid of that fall_dist_5yr$spp <- gsub("[[:punct:]]$", "", fall_dist_5yr$spp) #for some reason it was putting punctuation at the end. get rid of that #make large dataframes ##fall fall_dev_dist_spec <- left_join(fall_gam_dev, spp_class, by=c("names"="SCINAME")) fall_dev_dist_spec <- left_join(fall_dev_dist_spec, fall_dist_5yr, by=c("names"="spp")) fall_dev_dist_spec <- left_join(fall_dev_dist_spec, convex_hull_fall, by=c("names"="names")) fall_dev_dist_spec$dist_5yr <- fall_dev_dist_spec$distGeo.fall_first_five..fall_last_five. ##spring spring_dev_dist_spec <- left_join(spring_gam_dev, spp_class, by=c("names"="SCINAME")) spring_dev_dist_spec <- left_join(spring_dev_dist_spec, spring_dist_5yr, by=c("names"="spp")) spring_dev_dist_spec <- left_join(spring_dev_dist_spec, convex_hull_spring, by=c("names"="names")) spring_dev_dist_spec$dist_5yr <- spring_dev_dist_spec$distGeo.spring_first_five..spring_last_five. ``` Select species based on CPUE ( >= 20) in first five and last five years ```{r} load("neus_Survdat.RData") load("neus_SVSPP.RData") #which species are present in multiple years in both the spring and fall periods dat <- left_join(survdat, spp, by=c("SVSPP")) dat <- subset(dat, dat$YEAR >1985) dat$SCINAME <- gsub(pattern = " ", replacement = ".", x = dat$SCINAME) dat$SCINAME <- gsub("[[:punct:]]$", "", dat$SCINAME) #for some reason it was putting punctuation at the end. get rid of that all_fall <- subset(dat, SEASON == "FALL") all_fall$SCINAME <- gsub(" ", ".", all_fall$SCINAME) #all_fall <- subset(all_fall, all_fall$STRATUM %in% fall_stratum) spp_class <- read.csv("spp_by_year_selected.csv") spp_class$SCINAME <- gsub(pattern = " ", replacement = ".", x = spp_class$SCINAME) spp_class$SCINAME <- gsub("[[:punct:]]$", "", spp_class$SCINAME) #for some reason it was putting punctuation at the end. get rid of that spp_prof <- read.csv("spp_depth_profiles.csv") spp_prof$SCINAME <- gsub("[[:punct:]]$", "", spp_prof$SCINAME) #for some reason it was putting punctuation at the end. get rid of that spp_class <- left_join(spp_class, spp_prof, by=c("SCINAME")) spp_class <- subset(spp_class, spp_class$type=="fish") specnames <- as.vector(spp_class$SCINAME) all_fall <- all_fall[all_fall$SCINAME %in% specnames,] fall_pres <- as.data.frame(all_fall) %>% spread(SCINAME, ABUNDANCE) %>% group_by(YEAR) %>% summarise_at(vars (specnames), funs(sum), na.rm = TRUE) fall_pres <- rbind(fall_pres, colSums(fall_pres[1:5,])) fall_pres <- rbind(fall_pres, colSums(fall_pres[29:33,])) fall_select <- fall_pres[34:35,2:ncol(fall_pres)] fall_select <- as.data.frame(t(fall_select)) fall_select$spp <- rownames(fall_select) fall_select <- subset(fall_select, fall_select$V1 >19) fall_select <- subset(fall_select, fall_select$V2 >19) fall_spp_selected <- as.vector(fall_select$spp) #spring all_spring <- subset(dat, SEASON == "SPRING") all_spring$SCINAME <- gsub(" ", ".", all_spring$SCINAME) #all_spring <- subset(all_spring, all_spring$STRATUM %in% spring_stratum) all_spring <- all_spring[all_spring$SCINAME %in% specnames,] specnames_spring <- specnames[specnames != "FISTULARIA.PETIMBA"] spring_pres <- as.data.frame(all_spring) %>% spread(SCINAME, ABUNDANCE) %>% group_by(YEAR) %>% summarise_at(vars (specnames_spring), funs(sum), na.rm = TRUE) spring_pres <- rbind(spring_pres, colSums(spring_pres[1:5,])) spring_pres <- rbind(spring_pres, colSums(spring_pres[29:33,])) spring_select <- spring_pres[34:35,2:ncol(spring_pres)] spring_select <- as.data.frame(t(spring_select)) spring_select$spp <- rownames(spring_select) spring_select <- subset(spring_select, spring_select$V1 >19) spring_select <- subset(spring_select, spring_select$V2 >19) spring_spp_selected <- as.vector(spring_select$spp) ``` ```{r} #use only the selected species fall_dev_dist_spec <- fall_dev_dist_spec[fall_dev_dist_spec$names %in% fall_spp_selected,] spring_dev_dist_spec <- spring_dev_dist_spec[spring_dev_dist_spec$names %in% spring_spp_selected,] ##save files write.csv(spring_dev_dist_spec, file="spring_dev_dist_spec.csv") write.csv(fall_dev_dist_spec, file="fall_dev_dist_spec.csv") ``` #8 Figures ##8.1 FALL ### 8.1.2 set up melt find the most important predictor variable based on deviance explained, convert distance to km. ```{r} fall_dev_dist_spec <- fall_dev_dist_spec %>% dplyr::rename("depth" = dev.depth, "bottom temperature" = dev.bt, "surface temperature" = dev.sst, "bottom salinity"=dev.bsal, "surface salinity" = dev.ssal, "substrate"= dev.sed) #removing surface salinity and temperature fall_dev_dist_spec_strong <- fall_dev_dist_spec %>% dplyr::select("bottom temperature", "bottom salinity", "depth", "substrate") fall_dev_dist_spec_strong <- abs(fall_dev_dist_spec_strong) fall_dev_dist_spec_strong$strong <- colnames(fall_dev_dist_spec_strong)[max.col(fall_dev_dist_spec_strong,ties.method="first")] fall_dev_dist_spec$strong <- fall_dev_dist_spec_strong$strong #all distance #was in meters fall_dev_dist_spec$centroid_shift_km <- fall_dev_dist_spec$dist_5yr/1000 write.csv(fall_dev_dist_spec, "fall_dev_dist_spec.csv") ``` ###8.1.3 Load cleaned supplemental data Ive gone in and made cleaned supplemental tables for publication. They are the same as fall_dev_dist_spec, but with newer headings etc. We can make the figures from them. This chunk also melts data to be in a format for plotting. ```{r} fall_dev_dist_spec <- read.csv("Supplemental_table_fall.csv") colnames(fall_dev_dist_spec) <- gsub("[[:punct:]]", " ", colnames(fall_dev_dist_spec)) colnames(fall_dev_dist_spec) <- gsub(" ", " ", colnames(fall_dev_dist_spec)) colnames(fall_dev_dist_spec) <- gsub(" ", " ", colnames(fall_dev_dist_spec)) colnames(fall_dev_dist_spec) <- gsub(" $", "", colnames(fall_dev_dist_spec)) #melt data tomelt <- fall_dev_dist_spec %>% dplyr::select("scientific name", "Mean centroid shift km", "Range size change" , "Northern range extent latitude shift degrees", "Southern range extent latitude shift degrees") colnames(tomelt) <- c("scientific name", "Mean centroid shift (km)", "Percentage change in range size (%)", "Northern range shifted (latitude)", "Southern range shifted (latitude)") dat.m.f.2 <- melt(tomelt,id.vars='scientific name', measure.vars=c("Mean centroid shift (km)", "Percentage change in range size (%)", "Northern range shifted (latitude)", "Southern range shifted (latitude)")) data.m.f.2 <- left_join(fall_dev_dist_spec, dat.m.f.2, by=c("scientific name")) data.m.f.2 <- as.data.frame(data.m.f.2) ``` ###8.1.4 figs create figures ```{r} options(scipen = 99999) #figure 1 ##Distance shifted vs strongest predictor variable in the fall my_comparisons <- list(c("depth", "bottom temperature"), c("depth", "bottom salinity"), c("depth", "substrate"), c("bottom temperature", "bottom salinity"), c("bottom temperature", "substrate"), c("bottom salinity", "substrate")) ggplot(data=data.m.f.2, aes(x=`strongest predictor variable`, y=value, fill=`strongest predictor variable`)) + geom_boxplot(outlier.size=.5, fatten = 1) + stat_summary(fun.y=mean, colour="darkred", geom="point", shape=18, size=2,show_guide = FALSE) + labs(y = "", x = "Strongest predictor variable") + facet_wrap(~variable, scales = "free") + stat_compare_means(comparisons = my_comparisons, method="wilcox.test", label = "p.format", size=2.5) + labs(fill = "Species Type") + scale_x_discrete(labels = wrap_format(10)) + theme_classic(base_size=10) + theme(axis.title = element_text(size = rel(1.5)), axis.text = element_text(size = rel(1)), strip.text = element_text(size = rel(1)), panel.spacing = unit(1, "cm")) + theme(legend.position = "none") + scale_fill_simpsons() + scale_color_simpsons() + ggsave(filename= "Figure1.jpg", plot=last_plot(), width = 18, height=18, units=c("cm"), dpi=500) #figure 3 Pelagic species have shifted more than benthic species my_comparisons <- list( c("pelagic", "demersal"), c("pelagic", "benthic"), c("demersal", "benthic")) ggplot(data=data.m.f.2, aes(x=`Species type`, y=value, fill=`Species type`)) + geom_boxplot(outlier.size=.5, fatten = 1) + stat_summary(fun.y=mean, colour="darkred", geom="point", shape=18, size=2,show_guide = FALSE) + labs(y = "", x = "Species type") + stat_compare_means(comparisons = my_comparisons, method="wilcox.test", label = "p.format", size=2.5) + labs(fill = "Species Type") + facet_wrap(~variable, scales = "free") + theme_classic(base_size=10) + theme(axis.title = element_text(size = rel(1.5)), axis.text = element_text(size = rel(1)), strip.text = element_text(size = rel(1)), panel.spacing = unit(1, "cm")) + theme(legend.position = "none") + scale_fill_manual(values=c("#999999", "#E69F00", "#56B4E9")) + ggsave(filename= "Figure3.jpg", plot=last_plot(), width = 18, height=18, units=c("cm"), dpi=500) fall_dev_dist_spec$strong <- gsub(" ", "\n", fall_dev_dist_spec$strong) #Figure 2 strongest variable for each species group ggplot(data=subset(fall_dev_dist_spec, !is.na(`strongest predictor variable`)), aes(x= `strongest predictor variable`, group=`Species type`)) + geom_bar(aes(y = ..prop.., fill = factor(..x..)), stat="count") + geom_text(aes( label = scales::percent(..prop..), y= ..prop.. ), stat= "count", hjust=1, size=3) + labs(y = "Percent", x = "strongest variable", lab.nb.digits=1, color="variable", orientation = "horiz", lab.size = 7, lab.hjust=-1, xlim=c(0,100)) + facet_wrap(~`Species type`) + scale_y_continuous(labels = percent_format(accuracy = 1)) + coord_flip() + theme(legend.position = "none") + theme_classic(base_size=10) + theme(axis.title = element_text(size = rel(1.5)), axis.text = element_text(size = rel(1)), strip.text = element_text(size = rel(1))) + scale_fill_simpsons() + scale_color_simpsons() + theme(legend.position = "none") + ggsave(filename= "Figure2.jpg", plot=last_plot(), width = 18, height=9, units=c("cm"), dpi=500) ``` ##8.2 SPRING ###8.2.1 set up melt The below code does the exact same thing as above but for the spring ```{r} spring_dev_dist_spec <- spring_dev_dist_spec %>% dplyr::rename("depth" = dev.depth, "bottom temperature" = dev.bt, "surface temperature" = dev.sst, "bottom salinity"=dev.bsal, "surface salinity" = dev.ssal, "substrate"= dev.sed) #removing surface salinity and temperature spring_dev_dist_spec_strong <- spring_dev_dist_spec %>% dplyr::select("bottom temperature", "bottom salinity", "depth", "substrate") spring_dev_dist_spec_strong <- abs(spring_dev_dist_spec_strong) spring_dev_dist_spec_strong$strong <- colnames(spring_dev_dist_spec_strong)[max.col(spring_dev_dist_spec_strong,ties.method="first")] spring_dev_dist_spec$strong <- spring_dev_dist_spec_strong$strong #all distance spring_dev_dist_spec$centroid_shift_km <- spring_dev_dist_spec$dist_5yr/1000 #was in meters write.csv(spring_dev_dist_spec, "spring_dev_dist_spec.csv") ``` ###8.1.3 Load cleaned supplemental data ```{r} #Ive gone in and made a cleaned up the supplemental tables for publication. They are the same as spring_dev_dist_spec, but with newer headings etc. We can make the figures from that. spring_dev_dist_spec <- read.csv("Supplemental_table_spring.csv") colnames(spring_dev_dist_spec) <- gsub("[[:punct:]]", " ", colnames(spring_dev_dist_spec)) colnames(spring_dev_dist_spec) <- gsub(" ", " ", colnames(spring_dev_dist_spec)) colnames(spring_dev_dist_spec) <- gsub(" ", " ", colnames(spring_dev_dist_spec)) colnames(spring_dev_dist_spec) <- gsub(" $", "", colnames(spring_dev_dist_spec)) #melt data tomelt <- spring_dev_dist_spec %>% dplyr::select("scientific name", "Mean centroid shift km", "Range size change" , "Northern range extent latitude shift degrees", "Southern range extent latitude shift degrees") colnames(tomelt) <- c("scientific name", "Mean centroid shift (km)", "Percentage change in range size (%)", "Northern range shifted (latitude)", "Southern range shifted (latitude)") dat.m.s.2 <- melt(tomelt,id.vars='scientific name', measure.vars=c("Mean centroid shift (km)", "Percentage change in range size (%)", "Northern range shifted (latitude)", "Southern range shifted (latitude)")) data.m.s.2 <- left_join(spring_dev_dist_spec, dat.m.s.2, by=c("scientific name")) data.m.s.2 <- as.data.frame(data.m.s.2) ``` ###8.1.4 figs ```{r} options(scipen = 99999) #figure 1 Supplemental - Distance shifted vs strongest predictor variable in the spring my_comparisons <- list(c("depth", "bottom temperature"), c("depth", "bottom salinity"), c("depth", "substrate"), c("bottom temperature", "bottom salinity"), c("bottom temperature", "substrate"), c("bottom salinity", "substrate")) ggplot(data=data.m.s.2, aes(x=`strongest predictor variable`, y=value, fill=`strongest predictor variable`)) + geom_boxplot(outlier.size=.5, fatten = 1) + stat_summary(fun.y=mean, colour="darkred", geom="point", shape=18, size=2,show_guide = FALSE) + labs(y = "", x = "Strongest predictor variable") + facet_wrap(~variable, scales = "free") + stat_compare_means(comparisons = my_comparisons, method="wilcox.test", label = "p.format", size=2.5) + labs(fill = "Species Type") + scale_x_discrete(labels = wrap_format(10)) + theme_classic(base_size=10) + theme(axis.title = element_text(size = rel(1.5)), axis.text = element_text(size = rel(1)), strip.text = element_text(size = rel(1)), panel.spacing = unit(1, "cm")) + theme(legend.position = "none") + scale_fill_simpsons() + scale_color_simpsons() + ggsave(filename= "Figure1_supp.jpg", plot=last_plot(), width = 18, height=18, units=c("cm"), dpi=500) #figure 4 Supplemental - Pelagic species have shifted more than benthic species my_comparisons <- list( c("pelagic", "demersal"), c("pelagic", "benthic"), c("demersal", "benthic")) ggplot(data=data.m.s.2, aes(x=`Species type`, y=value, fill=`Species type`)) + geom_boxplot(outlier.size=.5, fatten = 1) + stat_summary(fun.y=mean, colour="darkred", geom="point", shape=18, size=2,show_guide = FALSE) + labs(y = "", x = "Species type") + stat_compare_means(comparisons = my_comparisons, method="wilcox.test", label = "p.format", size=2.5) + labs(fill = "Species Type") + facet_wrap(~variable, scales = "free") + theme_classic(base_size=10) + theme(axis.title = element_text(size = rel(1.5)), axis.text = element_text(size = rel(1)), strip.text = element_text(size = rel(1)), panel.spacing = unit(1, "cm")) + theme(legend.position = "none") + scale_fill_manual(values=c("#999999", "#E69F00", "#56B4E9")) + ggsave(filename= "Figure4_supp.jpg", plot=last_plot(), width = 18, height=18, units=c("cm"), dpi=500) spring_dev_dist_spec$strong <- gsub(" ", "\n", spring_dev_dist_spec$strong) #Figure 2 Supplemental - strongest variable for each species group in the spring ggplot(data=subset(spring_dev_dist_spec, !is.na(`strongest predictor variable`)), aes(x= `strongest predictor variable`, group=`Species type`)) + geom_bar(aes(y = ..prop.., fill = factor(..x..)), stat="count") + geom_text(aes( label = scales::percent(..prop..), y= ..prop.. ), stat= "count", hjust=1, size=3) + labs(y = "Percent", x = "strongest variable", lab.nb.digits=1, color="variable", orientation = "horiz", lab.size = 7, lab.hjust=-1, xlim=c(0,100)) + facet_wrap(~`Species type`) + scale_y_continuous(labels = percent_format(accuracy = 1)) + coord_flip() + theme(legend.position = "none") + theme_classic(base_size=10) + theme(axis.title = element_text(size = rel(1.5)), axis.text = element_text(size = rel(1)), strip.text = element_text(size = rel(1))) + scale_fill_simpsons() + scale_color_simpsons() + theme(legend.position = "none") + ggsave(filename= "Figure2_supp.jpg", plot=last_plot(), width = 18, height=9, units=c("cm"), dpi=500) ``` ##8.3 Figure 4 Map I originally created this figure in arcmap for layout and visualization capabilities, but it is also repeated below for reproducibility. ```{r} #map attempt world <- ne_countries(scale = "medium", returnclass = "sf") colnames(fall_dist_5yr_first5) <- c("Lon1", "Lat1", "dist1", "spp") colnames(fall_dist_5yr_last5) <- c("Lon2", "Lat2", "dist2", "spp") mapping_1 <- left_join(fall_dist_5yr_first5, fall_dist_5yr_last5, by = c("spp")) mapping_1 <- subset(mapping_1, mapping_1$spp == "HIPPOGLOSSOIDES.PLATESSOIDES" |mapping_1$spp == "GLYPTOCEPHALUS.CYNOGLOSSUS" | mapping_1$spp == "PSEUDOPLEURONECTES.AMERICANUS") mapping_2 <- left_join(fall_dist_5yr_first5, fall_dist_5yr_last5, by = c("spp")) mapping_2 <- subset(mapping_2, mapping_2$spp =="TRACHURUS.LATHAMI" |mapping_2$spp == "BREVOORTIA.TYRANNUS" | mapping_2$spp == "ETRUMEUS.TERES") ggplot(data = world) + geom_sf() + geom_point(data = mapping_1, aes(x = Lon1, y = Lat1, color = spp)) + geom_point(data = mapping_1, shape = 15, aes(x = Lon2, y = Lat2, color = spp)) + coord_sf(xlim=c(-85, -60), ylim=c(30,45), expand = TRUE) + theme(panel.background = element_rect(fill = "white", colour = "black")) ggplot(data = world) + geom_sf() + geom_point(data = mapping_2, aes(x = Lon1, y = Lat1, color = spp)) + geom_point(data = mapping_2, shape = 15, aes(x = Lon2, y = Lat2, color = spp)) + coord_sf(xlim=c(-85, -60), ylim=c(30,45), expand = TRUE) + theme(panel.background = element_rect(fill = "white", colour = "black")) ``` #9 count of each group ```{r} spring_dev_dist_spec %>% group_by(depth_profile) %>% summarise(count=n()) fall_dev_dist_spec %>% group_by(depth_profile) %>% summarise(count=n()) ```