#Code for manuscript "Current nursery offerings highlight the potential to expand native plant sales"
#Author: M.E. Fertakos
#Last Updated: September 29th, 2025
#Note: Please contact the corresponding author to inquire about the full dataset.

#load data----
modern<-read.csv(file="ModernOrnamentalsMS.csv", encoding="latin1") #Note: this file has placeholder nursery names and addresses removed. Contact the corresponding author to inquire about the full data. 
length(unique(paste(modern$NurseryName,modern$State))) #358 unique nurseries
#remove hybrids
modern <- modern %>%
  filter(!grepl(" x",AcceptedName)) 
modern <- modern %>%
  filter(!grepl("hybrid",AcceptedName))
length(unique(modern$AcceptedName)) #5983 remaining names
length(unique(paste(modern$NurseryName,modern$State))) #358 unique nurseries
modern <- modern %>%
  filter(!grepl("x Hesperotropsis leylandii",AcceptedName)) #hybrid found later (so 5982 remaining names)
#resolve taxonomy
#names to match
tnrs_data<-data.frame(AcceptedName=unique(modern$AcceptedName))
tnrs_data$ID <- seq.int(nrow(tnrs_data))

tnrs_data <- tnrs_data[,c(2,1)]
tnrs_data <- rename(tnrs_data, "Scientific Name" ="AcceptedName")

tnrs_results <- TNRS(tnrs_data, sources = c("wfo"), accuracy = 0.9)
tnrs_results <- tnrs_results[,c(2,34,41)]
colnames(tnrs_results)<-c("AcceptedName","TNRS_AcceptedName","TNRS_Family")

modern<-left_join(modern,tnrs_results,by="AcceptedName")
modern$TNRS_AcceptedName[modern$TNRS_AcceptedName==""]<-NA
length(unique(modern$AcceptedName[is.na(modern$TNRS_AcceptedName)])) #221 unique unresolved names
modern<-modern[!is.na(modern$TNRS_AcceptedName),] #removed 1900 records of 221 unresolvable names
modern <- modern %>%
  mutate(TNRS_AcceptedName = ifelse(grepl("^\\b\\w+\\b$", TNRS_AcceptedName), NA, TNRS_AcceptedName)) #Replace genus resolutions with NA so they can be removed
length(unique(modern$AcceptedName[is.na(modern$TNRS_AcceptedName)])) #534 unique genus only resolutions
modern<-modern[!is.na(modern$TNRS_AcceptedName),] #removed 2324 records of 534 species resolved to genus (mostly hybrids or interspecifics)
colnames(modern)[1]<-"OriginalName"
colnames(modern)[16]<-"AcceptedName"

#Cut to just genus/species (unless hybrid)
# Assuming df is your data frame and your_column is the column you want to modify
modern$AcceptedName <- sapply(strsplit(modern$AcceptedName, " "), function(x) {
  if (any(grepl("^x$", x))) {
    # If any element is exactly "x", keep the entire string
    paste(x, collapse = " ")
  } else {
    # Otherwise, keep only the first two words
    paste(x[1:min(2, length(x))], collapse = " ")
  }
})
length(unique(modern$AcceptedName)) #4665 unique species

#merge cultivar info that was completed by hand on a subset of the nurseries
cultivar_info<-read.csv('cultivar_species_check_ByHand_July25.csv')
cultivar_info<-cultivar_info[-c(7:8)]
modern<-left_join(modern,cultivar_info,by=c("NurseryName","State","Raw.Name..genus.species.","AcceptedName"))

#some species fixes found while recording cultivar names 
modern$AcceptedName[modern$AcceptedName=="Prunus avium" & modern$CultivarName=="autumnalis"]<-"Prunus subhirtella"
modern$AcceptedName[modern$AcceptedName=="Campanula fragilis" & modern$CultivarName=="bavarian blue"]<-"Campanula cochlearifolia"
modern$AcceptedName[modern$AcceptedName=="Viburnum davidii" & modern$CultivarName=="blue muffin"]<-"Viburnum dentatum"
modern$AcceptedName[modern$AcceptedName=="Rhododendron calendulaceum" & modern$CultivarName=="camilla's blush"]<-"Rhododendron canescens"
modern$AcceptedName[modern$AcceptedName=="Digitalis parviflora" & modern$CultivarName=="candy mountain"]<-"Digitalis purpurea"
modern$AcceptedName[modern$AcceptedName=="Digitalis parviflora" & modern$CultivarName=="candy mountain white"]<-"Digitalis purpurea"
modern$AcceptedName[modern$AcceptedName=="Viburnum carlesii" & modern$CultivarName=="cardinal candy"]<-"Viburnum dilatatum"
modern$AcceptedName[modern$AcceptedName=="Virburnum davidii" & modern$CultivarName=="chicago lustre"]<-"Viburnum dentatum"
modern$AcceptedName[modern$AcceptedName=="Virburnum davidii" & modern$CultivarName=="christom"]<-"Viburnum dentatum"
modern$AcceptedName[modern$AcceptedName=="Cornus sanguinea" & modern$CultivarName=="arctic fire"]<-"Cornus sericea"
modern$AcceptedName[modern$AcceptedName=="Acer palmatum" & modern$CultivarName=="first flame"]<-"Acer pseudosieboldianum"
modern$AcceptedName[modern$AcceptedName=="Neomirandea angularis"]<-"Eupatorium fistulosum"
modern$AcceptedName[modern$AcceptedName=="Liatris scariosa" & modern$CultivarName=="floristan white"]<-"Liatris spicata"
modern$AcceptedName[modern$AcceptedName=="Buxus sinica" & modern$CultivarName=="franklin's gem"]<-"Buxus microphylla"
modern$AcceptedName[modern$AcceptedName=="Solidago sempervirens" & modern$CultivarName=="golden fleece"]<-"Solidago sphacelata"
modern$AcceptedName[modern$AcceptedName=="Cornus alba" & modern$CultivarName=="golden shadows"]<-"Cornus alternifolia"
modern$AcceptedName[modern$AcceptedName=="Rhododendron maximum" & modern$CultivarName=="grandiflorum"]<-"Rhododendron catawbiense"
modern$AcceptedName[modern$AcceptedName=="Chamaecyparis thyoides" & modern$CultivarName=="hay wire"]<-"Chamaecyparis lawsoniana"
modern$CultivarName[modern$AcceptedName=="Cornus alba" & modern$CultivarName=="ivory halo"]<-"bailhalo"
modern$AcceptedName[modern$AcceptedName=="Betula pendula" & modern$CultivarName=="whitespire"]<-"Betula populifolia"
modern$AcceptedName[modern$AcceptedName=="Acer tataricum" & modern$CultivarName=="joe witt"]<-"Acer tegmentosum"
modern$AcceptedName[modern$AcceptedName=="Cornus sanguinea" & modern$CultivarName=="kelseyi"]<-"Cornus sericea"
modern$AcceptedName[modern$AcceptedName=="Syringa vulgaris" & modern$CultivarName=="miss kim"]<-"Syringa pubescens"
modern$AcceptedName[modern$AcceptedName=="Liatris scariosa" & modern$CultivarName=="kobold"]<-"Liatris spicata"
modern$AcceptedName[modern$AcceptedName=="Prunus avium" & modern$CultivarName=="kwanzan"]<-"Prunus serrulata"
modern$AcceptedName[modern$AcceptedName=="Sambucus canadensis" & modern$CultivarName=="eva"]<-"Sambucus nigra"
modern$AcceptedName[modern$AcceptedName=="Sambucus canadensis" & modern$CultivarName=="lemony lace"]<-"Sambucus racemosa"
modern$AcceptedName[modern$AcceptedName=="Stachys germanica" & modern$CultivarName=="fuzzy wuzzy"]<-"Stachys byzantina"
modern$AcceptedName[modern$AcceptedName=="Rhododendron calendulaceum" & modern$CultivarName=="lee's dark purple"]<-"Rhododendron catawbiense"
modern$AcceptedName[modern$AcceptedName=="Sambucus nigra" & modern$CultivarName=="lemony lace"]<-"Sambucus racemosa"
modern$AcceptedName[modern$AcceptedName=="Aquilegia caerulea" & modern$CultivarName=="little lanterns"]<-"Aquilegia canadensis"
modern$AcceptedName[modern$AcceptedName=="Hamamelis vernalis" & modern$CultivarName=="little prospect"]<-"Hamamelis virginiana"
modern$AcceptedName[modern$AcceptedName=="Crataegus phaenopyrum" & modern$CultivarName=="ohio pioneer"]<-"Crataegus punctata"
modern$AcceptedName[modern$AcceptedName=="Lavandula angustifolia" & modern$CultivarName=="phenomenal"]<-"Lavandula intermedia"
modern$AcceptedName[modern$AcceptedName=="Cornus sanguinea" & modern$CultivarName=="pucker up"]<-"Cornus sericea"
modern$AcceptedName[modern$AcceptedName=="Salvia leucantha" & modern$CultivarName=="purple knockout"]<-"Salvia lyrata"
modern$AcceptedName[modern$AcceptedName=="Cornus florida" & modern$CultivarName=="cardinal"]<-"Cornus sericea"
modern$AcceptedName[modern$AcceptedName=="Betula pendula" & modern$CultivarName=="renaissance oasis"]<-"Betula papyrifera"
modern$AcceptedName[modern$AcceptedName=="Phlox paniculata" & modern$CultivarName=="jeanne d'arc"]<-"Hibiscus syriacus"
modern$AcceptedName[modern$AcceptedName=="Prunus fruticosa" & modern$CultivarName=="santa rosa"]<-"Prunus salicina"
modern$AcceptedName[modern$AcceptedName=="Viburnum lantana" & modern$CultivarName=="shasta"]<-"Viburnum plicatum"
modern$AcceptedName[modern$AcceptedName=="Prunus fruticosa" & modern$CultivarName=="stanley"]<-"Prunus domestica"
modern$AcceptedName[modern$AcceptedName=="Prunus fruticosa" & modern$CultivarName=="stella"]<-"Prunus domestica"
modern$AcceptedName[modern$AcceptedName=="Pinus taeda" & modern$CultivarName=="thunderhead"]<-"Pinus thunbergii"
modern$AcceptedName[modern$AcceptedName=="Hydrangea macrophylla" & modern$CultivarName=="tuff stuff"]<-"Hydrangea serrata"
modern$AcceptedName[modern$AcceptedName=="Hydrangea macrophylla" & modern$CultivarName=="tuff stuff ah ha"]<-"Hydrangea serrata"
modern$AcceptedName[modern$AcceptedName=="Hydrangea macrophylla" & modern$CultivarName=="tuff stuff red"]<-"Hydrangea serrata"
modern$AcceptedName[modern$AcceptedName=="Dactylicapnos scandens" & modern$CultivarName=="valentine"]<-"Lamprocapnos spectabilis"
modern$AcceptedName[modern$OriginalName=="Verbena lanai"]<-"Verbena hybrida"



#add status from HPS 1.1 (combines data from USDA PLANTS, Will's Twenties Rule data, RIIS, and by hand searches)
#load in data
HPS<-read.csv(file='HPS1.1_taxa.csv',encoding = "UTF-8")
#select only columns of interest
HPS<-HPS %>%
  select(AcceptedName,Family,Habit,combo_status,US_regulated)%>%
  distinct()
#combine with modern
modern2<-left_join(modern,HPS,by="AcceptedName")
#remove hybrids from resolved names
modern2 <- modern2 %>%
  filter(!grepl(" x ",AcceptedName)) #removed 37 hybrid species
modern2 <- modern2 %>%
  filter(!grepl("hybrid",AcceptedName)) #remove 1 additional hybrid
length(unique(modern2$AcceptedName)) #4629 unique species
#remove unneeded columns
modern2<-modern2 %>%
  select(OriginalName,NurseryName,NurseryLocation,State,Raw.Name..genus.species.,CommonName,CatalogDate,Link.to.GoogleDoc,
         Type, NurseryType,Format,AcceptedName,TNRS_Family,Habit,combo_status,US_regulated,IsCultivar,CultivarName)
length(unique(modern2$AcceptedName[is.na(modern2$combo_status)])) #
#identify species with no data
NoData<-modern2 %>%
  filter(is.na(combo_status)) %>%
  select(AcceptedName,TNRS_Family,US_regulated) %>%
  distinct()

length(unique(paste(modern$NurseryName,modern$State))) #356 unique locations (2 nurseries removed with unresolvable names)
#add Habit from USDA PLANTS
#add USDA ID
USDAcodes<-read.table("USDA_plants_codes.txt",sep=",",header=T)
USDAcodes<-USDAcodes%>%
  select(Accepted.Symbol,Scientific.Name,Native.Status) %>%
  distinct()
colnames(USDAcodes)[2]<-"AcceptedName"
USDAcodes$Native.Status[USDAcodes$Native.Status==""]<-NA
#Append Accepted USDAcodes
NoData<-plyr::join(NoData,USDAcodes, by = "AcceptedName",type="left", match="first") #only takes the first match if there are more than 1 USDAcode name per accepted name
colnames(NoData)[4]<-"USDAcode"


growthhabits=read.csv("USDAgrowthhabitbyUSDAid.csv",header=TRUE)
#fix weird column name
#colnames(growthhabits)[1] <- gsub('^...','',colnames(growthhabits)[1])
as.data.frame(growthhabits)->growthhabits

NoData<-left_join(NoData,growthhabits,by="USDAcode")

#add combo status
#Add RIIS
sp_rarity<-read.csv('USGS_RIIS_2023_invasive.csv')
load('abundance_names_accepted2.rda') #file with TNRS resolved names
colnames(sp_rarity)[1]<-"SearchedName"
sp_rarity<-plyr::join(sp_rarity,tnrs_results_a, by = "SearchedName",type="left",match="first") #only takes the first match
sp_rarity$AcceptedName <- sapply(strsplit(sp_rarity$AcceptedName, " "), function(x) {
  if (any(grepl("^x$", x))) {
    # If any element is exactly "x", keep the entire string
    paste(x, collapse = " ")
  } else {
    # Otherwise, keep only the first two words
    paste(x[1:min(2, length(x))], collapse = " ")
  }
})
aggregated_abundance <- aggregate(. ~ AcceptedName, data = sp_rarity, FUN = function(x) { #aggregate duplicated names
  if (is.character(x)) {
    non_na_values <- unique(na.omit(x))
    if (length(non_na_values) > 0) {
      return(paste(unique(non_na_values), collapse = ", "))
    } else {
      return(NA)
    }
  } else if (is.numeric(x)) {
    return(sum(x, na.rm = TRUE))
  }
},na.action=NULL)
aggregated_abundance$degreeOfEstablishment<-gsub(".*invasive.*","invasive",aggregated_abundance$degreeOfEstablishment)
aggregated_abundance <- aggregated_abundance %>%
  select(AcceptedName,degreeOfEstablishment) %>%
  distinct()
NoData<-left_join(NoData,aggregated_abundance,by="AcceptedName")

#Add Twenties
twenties_rule<-read.csv("TwentiesRule_US.csv")
load('twenties_names_accepted2.rda') #file with TNRS resolved names
twenties_rule<-twenties_rule[-1]
colnames(twenties_rule)[1]<-"SearchedName"
twenties_rule<-plyr::join(twenties_rule,tnrs_results_t, by = "SearchedName",type="left",match="first") #only takes the first match
twenties_rule<-twenties_rule[-1]
twenties_rule$AcceptedName <- sapply(strsplit(twenties_rule$AcceptedName, " "), function(x) {
  if (any(grepl("^x$", x))) {
    # If any element is exactly "x", keep the entire string
    paste(x, collapse = " ")
  } else {
    # Otherwise, keep only the first two words
    paste(x[1:min(2, length(x))], collapse = " ")
  }
})
aggregated_twenties <- aggregate(. ~ AcceptedName, data = twenties_rule, FUN = function(x) {
  if (is.character(x)) {
    non_na_values <- unique(na.omit(x))
    if (length(non_na_values) > 0) {
      return(paste(unique(non_na_values), collapse = ", "))
    } else {
      return(NA)
    }
  } else if (is.numeric(x)) {
    return(sum(x, na.rm = TRUE))
  }
},na.action=NULL)
aggregated_twenties$US_status[which(aggregated_twenties$US_status=="established, invasive")]<-"invasive"
aggregated_twenties$US_status[which(aggregated_twenties$US_status=="invasive, established")]<-"invasive"
aggregated_twenties<-aggregated_twenties %>% select(AcceptedName,US_status) %>% distinct()
NoData<-left_join(NoData,aggregated_twenties,by="AcceptedName")

#update USDA status
NoData$Native.Status[grepl('L48\\(N\\)',NoData$Native.Status)]<-'N'
NoData$Native.Status[grepl('L48\\(I\\)',NoData$Native.Status)] <- 'I'
NoData$Native.Status[grepl('L48\\(I\\?\\)',NoData$Native.Status)] <- 'I'
NoData$Native.Status[grepl('L48\\(I,N\\)',NoData$Native.Status)] <- 'I/N'
NoData$Native.Status[grepl('NA\\(N\\)',NoData$Native.Status)] <- 'N' 
NoData$Native.Status[grepl('NA\\(I\\)',NoData$Native.Status)] <- 'I' 
NoData$Native.Status[grepl('L48\\(N\\?\\)',NoData$Native.Status)] <- 'N' 

NoData <- NoData %>%
  mutate(Native.Status = ifelse(Native.Status %in% c("I", "N", "I/N"), Native.Status, NA))

#add number of GBIF records
# myspecies<-unique(NoData$AcceptedName[is.na(NoData$combo_status)])## the species we want to check
# state_provinces<-c("Alabama; Arizona; Arkansas; California; Colorado; Connecticut; Delaware; Florida; Georgia; Idaho; Illinois; Indiana; Iowa; Kansas; Kentucky; Louisiana; Maine; Maryland; Massachusetts; Michigan; Minnesota; Mississippi; Missouri; Montana; Nebraska; Nevada; New Hampshire; New Jersey; New Mexico; New York; North Carolina; North Dakota; Ohio; Oklahoma; Oregon; Pennsylvania; Rhode Island; South Carolina; South Dakota; Tennessee; Texas; Utah; Vermont; Virginia; Washington; West Virginia; Wisconsin; Wyoming")
# 
# counterz <- data.frame(AcceptedName = character(0), count = integer(0), stringsAsFactors = FALSE) ## empty dataframe
# for (z in myspecies){ #loop over all species
#   count<-occ_count(scientificName=z,stateProvince=state_provinces)###for each species count number of entries
#   goober<-data.frame(AcceptedName=z,count=count, stringsAsFactors = FALSE) ### merge that into new dataframe with species name
#   counterz<-rbind(goober,counterz) ## rbind them so they dont overwrite
#   print(z) ## keep me updated
# }

#append by hand fixes
fixes<-read.csv(file="modern_GBIFcounts.csv",header=TRUE)
fixes<-fixes[-2]
#update habits
merged_df <- merge(NoData, fixes[, c("AcceptedName", "Habit")], by = "AcceptedName", all.x = TRUE)

# Use the Habit column from fixes to update the Habit column in NoData
merged_df$Habit.x[is.na(merged_df$Habit.x)] <- merged_df$Habit.y[is.na(merged_df$Habit.x)]

# Drop the Habit.y column and rename Habit.x to Habit
merged_df <- merged_df[, !names(merged_df) %in% "Habit.y"]
names(merged_df)[names(merged_df) == "Habit.x"] <- "Habit"

#add by hand status column
fixes<-fixes[-3]
merged_df<-left_join(merged_df,fixes,by="AcceptedName")
merged_df$Habit[merged_df$Habit==""]<-NA
merged_df$status[merged_df$status==""]<-NA

NoData<-merged_df

#create combo_status
NoData$combo_status<-NA
invasive_conditions <- NoData$US_status == "invasive" |
  NoData$degreeOfEstablishment == "invasive" |
  NoData$US_regulated == "Yes" |
  NoData$status=="invasive"

established_conditions <- NoData$US_status == "established" |
  NoData$degreeOfEstablishment == "established (category C3)" | 
  NoData$Native.Status == "I" | 
  NoData$status =="established"


native_conditions <- NoData$Native.Status == "N" | NoData$Native.Status == "I/N" | NoData$status=="native"

NoData$combo_status[established_conditions] <- "established"
NoData$combo_status[invasive_conditions] <- "invasive"
NoData$combo_status[native_conditions]<- "native"
NoData$combo_status[is.na(NoData$combo_status)] <- "introduced"  


#add NoData back to modern2
NoData2<-NoData %>%
  select(AcceptedName,Habit,combo_status,status)
# Merge modern2 with NoData2 based on the AcceptedName column
merged_modern2 <- merge(modern2, NoData2, by = "AcceptedName", all.x = TRUE, suffixes = c("", "_NoData2"))
# Update the Habit column in modern2 with values from NoData2 where applicable
merged_modern2$Habit[is.na(merged_modern2$Habit)] <- merged_modern2$Habit_NoData2[is.na(merged_modern2$Habit)]
# Update the combo_status column in modern2 with values from NoData2 where applicable
merged_modern2$combo_status[is.na(merged_modern2$combo_status)] <- merged_modern2$combo_status_NoData2[is.na(merged_modern2$combo_status)]
# Drop the columns with suffix "_NoData2"
merged_modern2 <- merged_modern2[, !names(merged_modern2) %in% c("Habit_NoData2", "combo_status_NoData2")] #down to 4629 species
length(unique(paste0(merged_modern2$NurseryName,merged_modern2$State))) #356 nurseries

#some more filtering  
merged_modern2$NurseryName[merged_modern2$NurseryName=="Log Cabin Perenniels"] <- "Log Cabin Perennials" #355 nurseries
merged_modern2$NurseryName[merged_modern2$NurseryName=="NorthCreek Nursery"] <- "North Creek Nursery" #354 nurseries
merged_modern2$NurseryName[merged_modern2$NurseryName=="Bartrams Garden"] <- "Bartram's Garden" #353 nurseries
merged_modern2$NurseryName[merged_modern2$NurseryName=="Pinelands Nursery & Supply"] <- "Pinelands Nursery" #352 nurseries
merged_modern2$NurseryName[merged_modern2$NurseryName=="Rutgers Landscape & Nursery"] <- "Rutgers Landscaping and Nursery" #352 nurseries
merged_modern2$NurseryName[merged_modern2$NurseryName=="Rutger's Landscaping and Nursery"] <- "Rutgers Landscaping and Nursery" #351 nurseries
merged_modern2$NurseryName[merged_modern2$NurseryName=="Willoway"] <- "Willoway Nurseries" #350 nurseries

#remove houseplants
merged_modern2$Type[is.na(merged_modern2$Type)]<-"Ornamental"
merged_modern2_backup<-merged_modern2
merged_modern2<-merged_modern2 %>%
  filter(Type=="Ornamental") #removes 707 houseplant species and 1 nursery (349 nurseries, 3922 species)
#fix a few nurseries
merged_modern2$State[merged_modern2$NurseryName=="New Moon Nursery"]<-"NJ" #348 nurseries
merged_modern2$State[merged_modern2$State=="OH "]<-"OH" #348 nurseries

#remove duplicate names/we have updated catalog as "NorthCreekNurseries"
merged_modern2 <- merged_modern2[merged_modern2$NurseryName != "North Creek Nurseries",] #removes 0 species, 347 nurseries
#remove FL outside scope
merged_modern2<-merged_modern2 %>%
  filter(!State=="FL") #removes 1 species (3921), 1 nursery (346 nurseries)
#remove North Carolina (only 2 nurseries and outside NECASC scope - 344 nurseries)
merged_modern2<-merged_modern2 %>%
  filter(!State=="NC") #removes 7 species (3914)
#remove Canadian nursery
merged_modern2<-merged_modern2 %>%
  filter(!NurseryName=="Bakker Nursery") #removes 1 species (3913) (343 nurseries)
#remove plant company not associated with specific location
merged_modern2<-merged_modern2 %>%
  filter(!NurseryName=="American Beauties Native Plants") #removes 1 species (3912)
merged_modern2<-merged_modern2 %>%
  filter(!NurseryName=="Izel Plants") #removes 60 species (3852)
merged_modern2<-merged_modern2 %>%
  filter(!NurseryName=="Meeting House Herb Farm") #removes 0 species , down to 340 nurseries
merged_modern2<-merged_modern2 %>%
  filter(!NurseryName=="Siteone Landscape Supply") #removes 517 species, down to 339 nurseries
#down to 3335 species
#remove hybrids identified by hand
merged_modern2 <- merged_modern2[merged_modern2$status != "hybrid" | is.na(merged_modern2$status), ] #removes 2 species (3333)

#Fix different addresses for same nursery
# nursery_locations<-merged_modern2 %>%
#   select(NurseryName,NurseryLocation,State) %>%
#   distinct()
# nursery_locations %>% group_by(NurseryName) %>% filter(n() > 1)
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Atlantic Nurseries"] <- "Deer Park Ave., Dix Hills, NY"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Millican Nursery"] <- "Pleasant St, Chichester, NH"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Willoway Nurseries"] <- "Center Rd., Avon, OH"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="North Creek Nursery"] <- "North Creek Road, Landenberg PA"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Archewild Native Nursery"] <- "Hillcrest Rd Quakertown, PA"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Northeast Pollinator Plants"] <- "191 Goose Pond Road, Fairfax, VT"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Chesterfield Nursery"] <- "Chesterfield Arneytown Rd, Chesterfield, NJ"
#Add more detail to some addresses
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Issima"] <- "Little Compton, Rhode Island"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Bowlings Nursery"] <- "2814 Todds Point Rd, Simpsonville, KY, United States, Kentucky"
merged_modern2$State[merged_modern2$NurseryName=="Bowlings Nursery"] <- "KY"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Oakland Nurseries"] <- "1156 Oakland Park Avenue Columbus, OH"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Davis Natives"] <- "Ashland, VA"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Pinelands Direct Native Plants"] <- "25346 Mount Pleasant Road, Columbus, New Jersey"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Bona Tera LLC"] <- "Friendship, MD"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Izel Plants"] <- "Friendship, MD"
merged_modern2$State[merged_modern2$NurseryName=="The Perennial Farm"] <- "MD"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="The Perennial Farm"] <- "12017 Glen Arm Road, Glen Arm, Maryland"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="The Natural Garden"] <- "Harrisonburg, VA"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Plane View Nursery"] <- "770 Wapping Rd Portsmouth, RI"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Swallowtail Gardens"] <- "266 Summit Spring Rd Poland, ME"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="VNPS Northern Neck Chapter Plant Sale"] <- "Kilmarnock, VA"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Yellow Spring Farms"] <- "1165 Yellow Springs Rd Chester Springs, PA"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Botanique"] <- "214 Pitcher Plant Lane, Stanardsville, VA"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Olson’s Greenhouse, Inc."] <- "Raynham, MA"
#fixing some addresses that did not geocode successfully
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Brock Farms Home & Garden"] <- "4189 US-9 Freehold, NJ 07728"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Burger Farm and Garden Center"] <- "7849 Main Street Newtown, OH"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Shady Oak Farm"] <- "20472 Doddtown Rd Harbeson, DE"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Griffin's Greenhouse"] <- "1619 Main St, Tewksbury, MA"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Pierson Nurseries"] <- "313 Waterhouse Rd, Dayton, ME 04005"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Walker Farm"] <- "1190 US-5, East Dummerston, VT 05346"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Rutgers Landscaping and Nursery"] <- "1051 US-202, Ringoes, NJ"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Scioto Gardens"] <- "2870 Curve Rd, Delaware OH"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="David's Nursery"] <- "3339 Mt Hope Rd, Exmore, VA"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Mid Atlantic Native Plant Farm"] <- "4238 Buckley Hall Road, Cobbs Creek, VA"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Oak Shade Nursery"] <- "434 Oakshade Rd, Shamong, NJ 08088"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Shrubs and Trees"] <- "110 Turnpike Rd #9, Southborough, MA"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Winghaven Nursery"] <- "115 T516-1, Coburn, PA"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Miller Hill Farm"] <- "2127 VT-73, Sudbury, VT"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Kube Pak"] <- "194 Route 526, Allentown, NJ"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Wild Ridge Plants"] <- "170 Mountain Rd, Alpha, NJ 08865"
merged_modern2$NurseryLocation[merged_modern2$NurseryName=="Willo'dell Nursery"] <- "1398 US-42, Ashland, OH"


#geocode addresses
nursery_locations<-merged_modern2 %>%
  select(NurseryName,NurseryLocation,State) %>%
  distinct()
Sys.setenv(GEOCODIO_API_KEY = "f06fb396216b3a9c6f26a6e66c2913abc0fd160")
nursery_coords<- geo(address=nursery_locations$NurseryLocation,method="geocodio",api_options=list(geocodio_v="1.7"),verbose=TRUE)
colnames(nursery_coords)[1]<-"NurseryLocation"
nursery_locations<-cbind(nursery_locations,nursery_coords)
nursery_locations<-nursery_locations[-c(2,4)]

#Remove more herbs/crops
merged_modern2<-merged_modern2[!merged_modern2$AcceptedName=="Capsicum baccatum",] #cultivated pepper
merged_modern2<-merged_modern2[!merged_modern2$AcceptedName=="Davallia solida",] #houseplant
merged_modern2<-merged_modern2[!merged_modern2$AcceptedName=="Capsicum annuum",] #cultivated pepper
merged_modern2<-merged_modern2[!merged_modern2$AcceptedName=="Phaseolus vulgaris",] #cultivated bean
merged_modern2<-merged_modern2[!merged_modern2$AcceptedName=="Solanum lycopersicum",] #cultivated tomato
merged_modern2<-merged_modern2[!merged_modern2$AcceptedName=="Solanum melongena",] #cultivated eggplant
merged_modern2<-merged_modern2[!merged_modern2$AcceptedName=="Capsicum frutescens",] #cultivated pepper
merged_modern2<-merged_modern2[!merged_modern2$AcceptedName=="Glycine max",] #cultivated soybean
merged_modern2<-merged_modern2[!merged_modern2$AcceptedName=="Phaseolus lunatus",] #cultivated lima bean #3324 species, 339 nurseries left

#status updates
merged_modern2$combo_status[merged_modern2$AcceptedName=="Ranunculus acris"]<-"introduced" #introduced, not native
merged_modern2$combo_status[merged_modern2$AcceptedName=="Achillea tomentosa"]<-"introduced" #introduced, not native
merged_modern2$combo_status[merged_modern2$AcceptedName=="Alchemilla alpina"]<-"introduced" #introduced, not native
merged_modern2$combo_status[merged_modern2$AcceptedName=="Humulus lupulus"]<-"established" #established per RIIS, not native
merged_modern2$combo_status[merged_modern2$AcceptedName=="Tilia tomentosa"]<-"established" #established per RIIS, not native
merged_modern2$combo_status[merged_modern2$AcceptedName=="Cornus alba"]<-"introduced" #introduced, not native
merged_modern2$combo_status[merged_modern2$AcceptedName=="Rumex acetosa"]<-"invasive" #invasive per RIIS, not native

#label nurseries----
#label nurseries selling >80% native species as native nurseries
nurserytype<-merged_modern2 %>%
  select(AcceptedName,NurseryName,State,combo_status) %>%
  distinct() %>%
  group_by(NurseryName,State,combo_status) %>%
  summarize(n_combostatus=n()) 
all_categories <- c("native", "established", "introduced", "invasive")
complete_data <- nurserytype %>%
  select(NurseryName) %>%
  distinct() %>%
  crossing(combo_status = all_categories)
expanded_data <- complete_data %>%
  left_join(nurserytype, by = c("NurseryName", "State", "combo_status")) %>%
  replace_na(list(n_combostatus = 0, total_spp = 0)) %>%
  ungroup() %>%
  group_by(NurseryName,State) %>%
  mutate(total_spp=sum(n_combostatus))
pNat<-expanded_data %>%
  filter(combo_status=="native") %>%
  mutate(percNat=n_combostatus/total_spp)
pNat$NurseryTypeNew<-NA
pNat$NurseryTypeNew[pNat$percNat>=0.8]<-"native"
pNat$NurseryTypeNew[pNat$percNat<0.8]<-"traditional"
NurseryCat<-pNat %>%
  select(NurseryName,State,total_spp,NurseryTypeNew)
merged_modern2<-left_join(merged_modern2,NurseryCat,by=c("NurseryName","State"))
merged_modern2<-left_join(merged_modern2,nursery_locations,by=c("NurseryName","State"))

#Add species regulation data----
regulations<-read.csv('RegulatedPlantsByState.csv')
regulations<-regulations[c(1,3,4)]

#Fix taxonomy
#names to match
tnrs_data<-data.frame(AcceptedName=unique(regulations$Clean.Scientific.Name))
tnrs_data$ID <- seq.int(nrow(tnrs_data))

tnrs_data <- tnrs_data[,c(2,1)]
tnrs_data <- rename(tnrs_data, "Scientific Name" ="AcceptedName")

tnrs_results <- TNRS::TNRS(tnrs_data, sources = c("wfo"), accuracy = 0.9)
tnrs_results <- tnrs_results[,c(2,34,41)]
colnames(tnrs_results)<-c("Clean.Scientific.Name","TNRS_AcceptedName","TNRS_Family")

regulations<-left_join(regulations,tnrs_results,by="Clean.Scientific.Name")
regulations$TNRS_AcceptedName[regulations$TNRS_AcceptedName==""]<-NA
length(unique(regulations$Clean.Scientific.Name[is.na(regulations$TNRS_AcceptedName)])) #221 unique unresolved names
colnames(regulations)[2]<-"OriginalName"
colnames(regulations)[4]<-"AcceptedName"
regulations$AcceptedName[is.na(regulations$AcceptedName)]<-regulations$OriginalName[is.na(regulations$AcceptedName)]

#Cut to just genus/species (unless hybrid)
# Assuming df is your data frame and your_column is the column you want to modify
regulations$AcceptedName <- sapply(strsplit(regulations$AcceptedName, " "), function(x) {
  if (any(grepl("^x$", x))) {
    # If any element is exactly "x", keep the entire string
    paste(x, collapse = " ")
  } else {
    # Otherwise, keep only the first two words
    paste(x[1:min(2, length(x))], collapse = " ")
  }
})
#regulations<-regulations[c(1,3,4)]
regulations_summary <- regulations %>%
  group_by(AcceptedName) %>%
  summarize(StatesRegulated = paste(unique(State), collapse = ", "))

species<-merged_modern2 %>%
  select(AcceptedName,combo_status,State) %>%
  distinct()

test<-left_join(species,regulations_summary,by="AcceptedName")

spp_regulations_L48<-test %>% select(AcceptedName,StatesRegulated) %>% distinct()

merged_modern3<-left_join(merged_modern2,spp_regulations_L48,by="AcceptedName")

#Add native ranges within the U.S. (native species)----
#setwd("C:/Users/mfertakos/University of Massachusetts/Spatial Ecology Lab/Spatial predictor datasets/WCVP_v11")\
setwd("C:/Users/mfertakos/OneDrive - University of Massachusetts/Documents - Spatial Ecology Lab/Spatial predictor datasets/WCVP_v11")
wcvp_dist<-read.table("wcvp_distribution.csv", sep="|", header=TRUE, quote = "", fill=TRUE, encoding = "UTF-8") 
wcvp_dist_f <- wcvp_dist %>%
  filter(continent=='NORTHERN AMERICA') %>% #only want US
  select(plant_name_id,continent,area_code_l3,area,introduced)

wcvp_names<-read.table("wcvp_names.csv", sep="|", header=TRUE, quote = "", fill=TRUE, encoding = "UTF-8")
wcvp_names_f<-wcvp_names %>%
  select(taxon_name,plant_name_id,accepted_plant_name_id) %>%
  filter(!is.na(accepted_plant_name_id)) %>%
  distinct()
colnames(wcvp_names_f)[1]<-"AcceptedName"
state_name_to_abbreviation <- c(
  "Alabama" = "AL",
  "Alaska" = "AK",
  "Arizona" = "AZ",
  "Arkansas" = "AR",
  "California" = "CA",
  "Colorado" = "CO",
  "Connecticut" = "CT",
  "Delaware" = "DE",
  "Florida" = "FL",
  "Georgia" = "GA",
  "Hawaii" = "HI",
  "Idaho" = "ID",
  "Illinois" = "IL",
  "Indiana" = "IN",
  "Iowa" = "IA",
  "Kansas" = "KS",
  "Kentucky" = "KY",
  "Louisiana" = "LA",
  "Maine" = "ME",
  "Maryland" = "MD",
  "Massachusetts" = "MA",
  "Michigan" = "MI",
  "Minnesota" = "MN",
  "Mississippi" = "MS",
  "Missouri" = "MO",
  "Montana" = "MT",
  "Nebraska" = "NE",
  "Nevada" = "NV",
  "New Hampshire" = "NH",
  "New Jersey" = "NJ",
  "New Mexico" = "NM",
  "New York" = "NY",
  "North Carolina" = "NC",
  "North Dakota" = "ND",
  "Ohio" = "OH",
  "Oklahoma" = "OK",
  "Oregon" = "OR",
  "Pennsylvania" = "PA",
  "Rhode I." = "RI",
  "South Carolina" = "SC",
  "South Dakota" = "SD",
  "Tennessee" = "TN",
  "Texas" = "TX",
  "Utah" = "UT",
  "Vermont" = "VT",
  "Virginia" = "VA",
  "Washington" = "WA",
  "West Virginia" = "WV",
  "Wisconsin" = "WI",
  "Wyoming" = "WY"
)

wcvp_dist_f$area <- state_name_to_abbreviation[wcvp_dist_f$area]
wcvp_dist_f<-wcvp_dist_f %>%
  filter(!is.na(area)) %>%
  filter(introduced=="0") #remove states where introduced
wcvp_dist_f<-wcvp_dist_f %>%
  group_by(plant_name_id) %>%
  summarise(concatenated_area = toString(unique(area)))

NativeTable<-merged_modern2 %>%
  select(AcceptedName,combo_status) %>%
  distinct() %>%
  filter(combo_status=="native")

Natives_joined<-left_join(NativeTable,wcvp_names_f,by="AcceptedName") #add name codes to natives of interest
Natives_joined<-left_join(Natives_joined,wcvp_dist_f,by="plant_name_id")#add native distributions by name code
Natives_joined_t <- Natives_joined %>%
  group_by(AcceptedName) %>%
  filter(!is.na(concatenated_area) | all(is.na(concatenated_area)) | (n() == 1 & is.na(concatenated_area))) #keep only rows with area data, except species with no data in any row
#fill remainder NAs by accepted name ID instead of AcceptedName
Natives_joined_t <- Natives_joined_t %>%
  mutate(concatenated_area = ifelse(is.na(concatenated_area),
                                    wcvp_dist_f$concatenated_area[match(accepted_plant_name_id, wcvp_dist_f$plant_name_id)],
                                    concatenated_area))
Natives_joined_t <- Natives_joined_t %>%
  group_by(AcceptedName) %>%
  filter(!is.na(concatenated_area) | (n() == 1 & is.na(concatenated_area))) %>% #keep only rows with area data, except species with no data in any row
  distinct()
Natives_joined_t<-Natives_joined_t[-c(2,3,4)]
Natives_joined_t<-distinct(Natives_joined_t)

colnames(Natives_joined_t)[2]<-"NativeRange"

merged_modern5<-left_join(merged_modern3,Natives_joined_t,by="AcceptedName")

#add a few native ranges by hand
merged_modern5$NativeRange[merged_modern5$AcceptedName == "Amianthium muscaetoxicum"]<-"MO, OK, NJ, NY, PA, WV, AL, AR, DE, FL, GA, KY, LA, MD, MS, NC, SC, TN, VA" #spelling
merged_modern5$NativeRange[merged_modern5$AcceptedName == "Andropogon gerardii"]<-"CO, MT, WY, IL, IA, KS, MN, MO, ND, NE, OK, SD, WI, CT, IN, ME, MA, MI, NH, NJ, NY, OH, PA, RI, VT, WV, AZ, UT, NM, TX, AL, AR, DE, FL, GA, KY, LA, MD, MS, NC, SC, TN, VA" #spelling
merged_modern5$NativeRange[merged_modern5$AcceptedName == "Arabis caucasica"]<- "ME, NY, VA, KY, TN, MI, WI" #from USDA PLANTS
merged_modern5$NativeRange[merged_modern5$AcceptedName == "Campanula rotundifolia"]<-"ME, NH, VT, MA, CT, NY, NJ, PA, MD, WV, VA, NC, TN, OH, IN, MI, IL, MN, WI, IA, ND, SD, NE, CO, WY, NM, TX, AZ, CA, WA, OR, MO, ID, UT" #from USDA PLANTS 
merged_modern5$NativeRange[merged_modern5$AcceptedName == "Cerastium arvense"]<-"ME, NH, VT, MA, CT, RI, NY, NJ, PA, MD, DE, WV, VA, KY, TN, GA, MS, OH, IN, MI, IL, MN, WI, IA, ND, SD, NE, CO, WY, NM, NV, AZ, CA, WA, OR, MO, ID, UT" #from USDA PLANTS (I/N plant)
merged_modern5$NativeRange[merged_modern5$AcceptedName == "Circaea lutetiana"]<-"AL, AR, CT, DE, GA, IL, IN, IA, KS, KY, LA, ME, MD, MA, MI, MN, MS, MO, NE, NH, NJ, NY, NC, ND, OH, OK, PA, RI, SC, SD, TN, VT, VA, WV, WI, WY" #from USDA PLANTS
merged_modern5$NativeRange[merged_modern5$AcceptedName == "Clinopodium vulgare"]<-"AZ, AR, CO, CT, DE, IL, IN, IA, KS, KY, ME, MD, MA, MI, MN, NH, NJ, NM, NY, NC, OH, OR, PA, RI, TN, UT, VT, VA, WA, WV, WI" #from USDA PLANTS
merged_modern5$NativeRange[merged_modern5$AcceptedName == "Dryopteris goldiana"]<-"IL, IA, MN, MO, WI, CT, IN, ME, MA, MI, NH, NJ, NY, OH, PA, RI, VT, WV, AL, AR, DE, GA, KY, MD, NC, SC, TN, VA" #spelling
merged_modern5$NativeRange[merged_modern5$AcceptedName == "Equisetum hyemale"]<-"AL, AZ, AR, CA, CO, CT, DE, FL, GA, ID, IL, IN, IA, KS, KY, LA, ME, MD, MA, MI, MN, MS, MO, MT, NE, NV, NH, NJ, NM, NY, NC, ND, OH, OK, OR, PA, RI, SC, SD, TN, TX, UT, VT, VA, WA, WV, WI, WY" #from USDA PLANTS
merged_modern5$NativeRange[merged_modern5$AcceptedName == "Juncus gerardii"]<-"CO, MT, OR, WA, IL, KS, MN, MO, ND, WI, CT, IN, ME, MA, MI, NH, NJ, NY, OH, PA, RI, VT, UT, DE, KY, MD, VA" #spelling
merged_modern5$NativeRange[merged_modern5$AcceptedName == "Melampodium divaricatum"]<-"FL" #from USDA PLANTS
merged_modern5$NativeRange[merged_modern5$AcceptedName == "Mentha arvensis"]<-"AZ, AR, CA, CO, CT, DE, GA, ID, IL, IN, IA, KS, KY, ME, MD, MA, MI, MN, MO, MT, NE, NV, NH, NJ, NM, NY, NC, ND, OH, OR, PA, RI, SD, TN, TX, UT, VT, VA, WA, WV, WI, WY" #from USDA PLANTS
merged_modern5$NativeRange[merged_modern5$AcceptedName == "Myosotis alpestris"]<-"WA, OR, ID, MT, WY, CO" #from USDA PLANTS
merged_modern5$NativeRange[merged_modern5$AcceptedName == "Nuphar lutea"]<-"AL, AZ, AR, CA, CO, CT, DE, FL, GA, ID, IL, IN, IA, KS, KY, LA, ME, MD, MA, MI, MN, MS, MO, MT, NE, NV, NH, NJ, NM, NY, NC, ND, OH, OK, OR, PA, RI, SC, SD, TN, TX, UT, VT, VA, WA, WV, WI, WY" #from USDA PLANTS
merged_modern5$NativeRange[merged_modern5$AcceptedName == "Onoclea struthiopteris"]<-"CT, IL, IN, IA, ME, MD, MA, MI, MN, MO, NE, NH, NJ, NY, ND, OH, PA, RI, SD, VT, VA, WV, WI" #from USDA PLANTS
merged_modern5$NativeRange[merged_modern5$AcceptedName == "Osmunda regalis"]<-"AL, AR, CT, DE, FL, GA, IL, IN, IA, KS, KY, LA, ME, MD, MA, MI, MN, MS, MO, NH, NJ, NY, NC, OH, OK, PA, RI, SC, TN, TX, VT, VA, WV, WI" #from USDA PLANTS
merged_modern5$NativeRange[merged_modern5$AcceptedName == "Pinus strobiformis"]<-"TX, NM, AZ, CO" #from USDA PLANTS
merged_modern5$NativeRange[merged_modern5$AcceptedName == "Potentilla neumanniana"]<-"MA, CT" #from USDA PLANTS
merged_modern5$NativeRange[merged_modern5$AcceptedName == "Pteridium aquilinum"]<-"AL, AZ, AR, CA, CO, CT, DE, FL, GA, ID, IL, IN, IA, KS, KY, LA, ME, MD, MA, MI, MN, MS, MO, MT, NV, NH, NJ, NM, NY, NC, ND, OH, OK, OR, PA, RI, SC, SD, TN, TX, UT, VT, VA, WA, WV, WI, WY" #from USDA PLANTS
merged_modern5$NativeRange[merged_modern5$AcceptedName == "Pulsatilla patens"]<-"WA, ID, MT, WY, UT, CO, NM, TX, ND, SD, NE, KS, IA, MN, WI, IL, MI" #from USDA PLANTS
merged_modern5$NativeRange[merged_modern5$AcceptedName == "Sambucus nigra"]<-"AL, AZ, AR, CA, CO, CT, DE, FL, GA, ID, IL, IN, IA, KS, KY, LA, ME, MD, MA, MI, MN, MS, MO, MT, NE, NV, NH, NJ, NM, NY, NC, ND, OH, OK, OR, PA, RI, SC, SD, TN, TX, UT, VT, VA, WA, WV, WI, WY" #from USDA PLANTS I/N
merged_modern5$NativeRange[merged_modern5$AcceptedName == "Senna marylandica"]<-"IL, WI, NY, TX, AL, FL, KY, MD, TN" #spelling
merged_modern5$NativeRange[merged_modern5$AcceptedName == "Spiraea betulifolia"]<-"OR, ID, MT, MN, NC" #from USDA PLANTS
merged_modern5$NativeRange[merged_modern5$AcceptedName == "Urtica dioica"]<-"AL, AZ, CA, CO, CT, DE, FL, GA, ID, IL, IN, IA, KS, KY, LA, ME, MD, MA, MI, MN, MS, MO, MT, NE, NV, NH, NJ, NM, NY, NC, ND, OH, OK, OR, PA, RI, SC, SD, TN, TX, UT, VT, VA, WA, WV, WI, WY" #from USDA PLANTS I/N
merged_modern5$NativeRange[merged_modern5$AcceptedName == "Viburnum opulus"]<-"CT, ID, IL, IN, IA, KY, ME, MD, MA, MI, MN, MO, MT, NE, NH, NJ, NM, NY, ND, OH, PA, RI, SD, TN, VT, VA, WA, WV, WI, WY" #from USDA PLANTS I/N


#Add states where sold ----
merged_modern6 <- merged_modern5 %>%
  ungroup() %>%
  group_by(AcceptedName) %>%
  mutate(StatesSold = paste(unique(State), collapse = ", "))

#Categorize regulated non-natives as invasive----
merged_modern6$combo_status[!merged_modern6$combo_status=="native" & !is.na(merged_modern6$StatesRegulated)]<-"invasive" #non-native species which are regulated in at least 1 state in L48 = invasive
merged_modern6<-merged_modern6 %>%
  filter(!AcceptedName=='x Hesperotropsis leylandii') #remove (hybrid) 3323 spp

#Recategorize native species ----
#Native: Native to at least one state in the Eastern Temperate Forest Ecoregion
#L48 Native:

native_subset<-merged_modern6 %>%
  filter(combo_status=="native") %>%
  select(AcceptedName,NativeRange) %>%
  distinct()

states2keep<-c("ME","NH","VT","MA","NY","CT","NJ","PA","OH","WV","VA","DE","MD","NC","SC","GA","FL","AL","KY","MI","IN","TN","MS","RI","IL","WI","MN","MO","AR","LA")#list of states each species should be in at least
filtered_native_subset <- native_subset %>%
  filter(
    sapply(NativeRange, function(range) {
      any(states2keep %in% strsplit(range, ", ")[[1]])
    })
  )
merged_modern7<-merged_modern6
merged_modern7$combo_status[merged_modern7$combo_status=="native" & merged_modern7$AcceptedName %in% filtered_native_subset$AcceptedName]<-"RegionalNative"
`%notin%` <- Negate(`%in%`)
merged_modern7$combo_status[merged_modern7$combo_status=="native" & merged_modern7$AcceptedName %notin% filtered_native_subset$AcceptedName]<-"OutsideNative"

#Add growth form for missing native species ----
merged_modern7$Habit[merged_modern7$AcceptedName=="Anemonastrum canadense"]<-"Forb/herb"
merged_modern7$Habit[merged_modern7$AcceptedName=="Homalosorus pycnocarpos"]<-"Forb/herb"
merged_modern7$Habit[merged_modern7$AcceptedName=="Hydrangea barbara"]<-"Vine"
merged_modern7$Habit[merged_modern7$AcceptedName=="Ionactis linariifolia"]<-"Forb/herb"
merged_modern7$Habit[merged_modern7$AcceptedName=="Lysimachia borealis"]<-"Forb/herb"
merged_modern7$Habit[merged_modern7$AcceptedName=="Primula meadia"]<-"Forb/herb"
merged_modern7$Habit[merged_modern7$AcceptedName=="Smallanthus uvedalia"]<-"Forb/herb"
merged_modern7$Habit[merged_modern7$AcceptedName=="Zephyranthes atamasco"]<-"Forb/herb"
merged_modern7$Habit[merged_modern7$AcceptedName=="Amianthium muscaetoxicum"]<-"Forb/herb"
merged_modern7$Habit[merged_modern7$AcceptedName=="Senna marylandica"]<-"Forb/herb"

#filter to just first growth form listed
merged_modern7 <- merged_modern7 %>%
  mutate(Habit = case_when(
    Habit == "Vine" ~ "Vine",
    Habit == "Graminoid" ~ "Grass",
    Habit == "Shrub, Tree" ~ "Shrub",
    Habit == "Forb/herb" ~ "Forb/herb",
    Habit == "Forb/herb, Subshrub" ~ "Forb/herb",
    Habit == "Shrub, Subshrub" ~ "Shrub",
    Habit == "Tree" ~ "Tree",
    Habit == "Shrub, Tree, Vine" ~ "Shrub",
    Habit == "Forb/herb, Vine" ~ "Forb/herb",
    Habit == "Shrub" ~ "Shrub",
    Habit == "Forb/herb, Shrub, Vine" ~ "Forb/herb",
    Habit == "Shrub, Vine" ~ "Shrub",
    Habit == "Subshrub" ~ "Shrub",
    Habit == "Graminoid, Shrub, Subshrub" ~ "Grass",
    Habit == "Forb/herb, Shrub, Subshrub" ~ "Forb/herb",
    Habit == "Forb/herb, Shrub" ~ "Forb/herb",
    Habit == "Shrub, Subshrub, Vine" ~ "Shrub",
    Habit == "Shrub, Subshrub, Tree" ~ "Shrub",
    Habit == "Forb/herb, Subshrub, Vine" ~ "Forb/herb",
    Habit == "Subshrub, Vine" ~ "Shrub",
    TRUE ~ Habit  # Default case to keep original value if no match
  ))

#fix growth forms of those with 2 different ones still
merged_modern7$Habit[merged_modern7$AcceptedName=="Carex sprengelii"]<-"Grass"
merged_modern7$Habit[merged_modern7$AcceptedName=="Carex squarrosa"]<-"Grass"
merged_modern7$Habit[merged_modern7$AcceptedName=="Eragrostis spectabilis"]<-"Grass"
merged_modern7$Habit[merged_modern7$AcceptedName=="Potentilla simplex"]<-"Forb/herb"
merged_modern7$Habit[merged_modern7$AcceptedName=="Quercus muehlenbergii"]<-"Tree"
merged_modern7$Habit[merged_modern7$AcceptedName=="Ruellia humilis"]<-"Forb/herb"
merged_modern7$Habit[merged_modern7$AcceptedName=="Symphyotrichum puniceum"]<-"Forb/herb"
merged_modern7$Habit[merged_modern7$AcceptedName=="Diarrhena obovata"]<-"Grass"
merged_modern7$Habit[merged_modern7$AcceptedName=="Rudbeckia missouriensis"]<-"Forb/herb"
merged_modern7$Habit[merged_modern7$AcceptedName=="Symphyotrichum sericeum"]<-"Forb/herb"
merged_modern7$Habit[merged_modern7$AcceptedName=="Pinus strobiformis"]<-"Tree"
merged_modern7$Habit[merged_modern7$AcceptedName=="Muhlenbergia reverchonii"]<-"Grass"
merged_modern7$Habit[merged_modern7$AcceptedName=="Symphyotrichum sericeum"]<-"Grass"

#Add global status of species ----
#add if invasive or established anywhere in the world
globalinv<-read.csv("TwentiesRule.csv")
globalinv<-globalinv %>%
  select(Accepted_name,status) %>%
  distinct()
load('twenties_names_accepted2.rda') #file with TNRS resolved names
colnames(globalinv)[1]<-"SearchedName"
globalinv<-plyr::join(globalinv,tnrs_results_t, by = "SearchedName",type="left",match="first") #only takes the first match
globalinv$AcceptedName<-ifelse(is.na(globalinv$AcceptedName),globalinv$SearchedName,globalinv$AcceptedName)
globalinv$AcceptedName <- sapply(strsplit(globalinv$AcceptedName, " "), function(x) {
  if (any(grepl("^x$", x))) {
    # If any element is exactly "x", keep the entire string
    paste(x, collapse = " ")
  } else {
    # Otherwise, keep only the first two words
    paste(x[1:min(2, length(x))], collapse = " ")
  }
})
globalinv <- globalinv %>%
  ungroup()%>%
  group_by(AcceptedName) %>%
  slice_max(order_by = status == "invasive", with_ties = FALSE) %>%
  ungroup() #deals with duplicates, keeping invasive row if est&inv are present in duplicate rows
globalinv<-globalinv[-1]
colnames(globalinv)<-c("GlobalStatus","AcceptedName")
globalinv<-distinct(globalinv)
merged_modern7<-left_join(merged_modern7,globalinv,by="AcceptedName")
merged_modern7<-merged_modern7[!merged_modern7$AcceptedName=="Capsicum chinense",] #crop (3322 species final)

#fix cultivar names which are close and the same (spelling mistakes, etc.)
cults<-merged_modern7 %>% select(AcceptedName,IsCultivar,CultivarName) %>% filter(IsCultivar=="yes") %>% distinct() %>% arrange(AcceptedName,CultivarName)

merged_modern7$CultivarName[merged_modern7$AcceptedName=="Delphinium elatum" & merged_modern7$CultivarName=="magic fountain dark blue dark bee"] <- "magic fountains dark blue dark bee"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Delphinium elatum" & merged_modern7$CultivarName=="magic fountain sky blue white bee"] <- "magic fountains sky blue white bee"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Spiraea japonica" & merged_modern7$CultivarName=="double play pained lady"] <- "double play painted lady"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Syringa vulgaris" & merged_modern7$CultivarName=="fiala rememberance"] <- "fiala remembrance"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Weigela florida" & merged_modern7$CultivarName=="czechmark triology"] <- "czechmark trilogy"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Hydrangea paniculata" & merged_modern7$CultivarName=="fire light tidbit"] <- "firelight tidbit"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Hydrangea paniculata" & merged_modern7$CultivarName=="little quick fire"] <- "little quickfire"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Aronia melanocarpa" & merged_modern7$CultivarName=="lo scape mound"] <- "low scape mound"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Aronia melanocarpa" & merged_modern7$CultivarName=="lowscape mound"] <- "low scape mound"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Astilbe rubra" & merged_modern7$CultivarName=="visions in pink"] <- "vision in pink"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Cercis canadensis" & merged_modern7$CultivarName=="appalachia red"] <- "appalachian red"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Deschampsia cespitosa" & merged_modern7$CultivarName=="northern light"] <- "northern lights"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Euonymus fortunei" & merged_modern7$CultivarName=="emerald n gold"] <- "emerald n' gold"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Euonymus japonicus" & merged_modern7$CultivarName=="aureo marginata"] <- "aureomarginata"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Euonymus japonicus" & merged_modern7$CultivarName=="aureo variegata"] <- "aureovariegata"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Hosta sieboldiana" & merged_modern7$CultivarName=="aureo marginata"] <- "aureomarginata"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Hydrangea macrophylla" & merged_modern7$CultivarName=="endless summer "] <- "endless summer"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Hydrangea macrophylla" & merged_modern7$CultivarName=="endless summer twist n shout"] <- "endless summer twist and shout"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Hydrangea paniculata" & merged_modern7$CultivarName=="lava lamp flare"] <- "lavalamp flare"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Iris pallida" & merged_modern7$CultivarName=="aureo variegata"] <- "aureovariegata"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Rudbeckia triloba" & merged_modern7$CultivarName=="black jack gold"] <- "blackjack gold"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Buddleja davidii" & merged_modern7$CultivarName=="black night"] <- "black knight"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Hakonechloa macra" & merged_modern7$CultivarName=="albo striata"] <- "albostriata"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Phlox paniculata" & merged_modern7$CultivarName=="up town girl"] <- "uptown girl"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Salvia indica" & merged_modern7$CultivarName=="iniglo girl"] <- "indiglo girl"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Abies koreana" & merged_modern7$CultivarName=="ice breaker"] <- "icebreaker"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Athyrium niponicum" & merged_modern7$CultivarName=="apple court"] <- "applecourt"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Cenchrus orientalis" & merged_modern7$CultivarName=="karly rose"] <- "karley rose"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Enkianthus campanulatus" & merged_modern7$CultivarName=="summer hill"] <- "summerhill"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Euonymus japonicus" & merged_modern7$CultivarName=="green spire"] <- "greenspire"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Heuchera americana" & merged_modern7$CultivarName=="coralbells"] <- "coral bells"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Juncus inflexus" & merged_modern7$CultivarName=="blue arrow"] <- "blue arrows"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Miscanthus sinensis" & merged_modern7$CultivarName=="gracillmus"] <- "gracillimus"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Physocarpus opulifolius" & merged_modern7$CultivarName=="ginger wine"] <- "gingerwine"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Picea abies" & merged_modern7$CultivarName=="cupressiana"] <- "cupressina"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Prunus laurocerasus" & merged_modern7$CultivarName=="otto luken"] <- "otto luyken"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Salvia nemorosa" & merged_modern7$CultivarName=="bumble snow"] <- "bumblesnow"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Acer palmatum" & merged_modern7$CultivarName=="emperor 1"] <- "wolff" #
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Acer palmatum" & merged_modern7$CultivarName=="emperor 2"] <- "wolff"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Acer palmatum" & merged_modern7$CultivarName=="emperor 3"] <- "wolff"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Acer palmatum" & merged_modern7$CultivarName=="emperor 12"] <- "wolff"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Acer palmatum" & merged_modern7$CultivarName=="emperor"] <- "wolff"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Acer palmatum" & merged_modern7$CultivarName=="emperor 10"] <- "wolff"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Acer palmatum" & merged_modern7$CultivarName=="emperor 11"] <- "wolff"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Acer palmatum" & merged_modern7$CultivarName=="emperor 4"] <- "wolff"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Acer palmatum" & merged_modern7$CultivarName=="emperor 5"] <- "wolff"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Acer palmatum" & merged_modern7$CultivarName=="emperor 6"] <- "wolff"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Acer palmatum" & merged_modern7$CultivarName=="emperor 7"] <- "wolff"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Acer palmatum" & merged_modern7$CultivarName=="emperor 8"] <- "wolff"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Acer palmatum" & merged_modern7$CultivarName=="emperor 9"] <- "wolff"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Acer palmatum" & merged_modern7$CultivarName=="red emperor"] <- "wolff"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Acer rubrum" & merged_modern7$CultivarName=="bradywine"] <- "brandywine"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Acer rubrum" & merged_modern7$CultivarName=="red pointe"] <- "redpointe"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Acer rubrum" & merged_modern7$CultivarName=="sun valley"] <- "sunvalley"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Akebia quinata" & merged_modern7$CultivarName=="shiro bana"] <- "shirobana"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Cenchrus setaceus" & merged_modern7$CultivarName=="sky rocket"] <- "skyrocket"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Dianthus gratianopolitanus" & merged_modern7$CultivarName=="fire witch"] <- "firewitch"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Hydrangea paniculata" & merged_modern7$CultivarName=="fire light"] <- "firelight"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Hydrangea quercifolia" & merged_modern7$CultivarName=="jet stream"] <- "jetstream"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Iris sibirica" & merged_modern7$CultivarName=="sun fisher"] <- "sunfisher"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Juniperus scopulorum" & merged_modern7$CultivarName=="sky rocket"] <- "skyrocket"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Lamprocapnos spectabilis" & merged_modern7$CultivarName=="gold heart"] <- "goldheart"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Nandina domestica" & merged_modern7$CultivarName=="fire power"] <- "firepower"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Panicum virgatum" & merged_modern7$CultivarName=="north wind"] <- "northwind"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Perovskia atriplicifolia" & merged_modern7$CultivarName=="lacy blue"] <- "lacey blue"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Prunus avium" & merged_modern7$CultivarName=="black gold"] <- "blackgold"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Prunus avium" & merged_modern7$CultivarName=="black york"] <- "blackyork"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Prunus avium" & merged_modern7$CultivarName=="white gold"] <- "whitegold"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Salvia nemorosa" & merged_modern7$CultivarName=="carradonna"] <- "caradonna"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Solidago rugosa" & merged_modern7$CultivarName=="fire works"] <- "fireworks"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Spiraea japonica" & merged_modern7$CultivarName=="gold mound"] <- "goldmound"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Stokesia laevis" & merged_modern7$CultivarName=="mels blue"] <- "mel's blue"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Taxus cuspidata" & merged_modern7$CultivarName=="green wave"] <- "greenwave"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Chamaecyparis pisifera" & merged_modern7$CultivarName=="filifera golden mop"] <- "filifera gold mop" #
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Impatiens hawkeri" & merged_modern7$CultivarName=="super sonic pirple"] <- "super sonic purple"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Liquidambar styraciflua" & merged_modern7$CultivarName=="slender silouhette"] <- "slender silhouette"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Spiraea japonica" & merged_modern7$CultivarName=="couble play doozie"] <- "double play doozie"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Armeria maritima" & merged_modern7$CultivarName=="spendens"] <- "splendens"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Boltonia asteroides" & merged_modern7$CultivarName=="snow bank"] <- "snowbank"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Brunnera macrophylla" & merged_modern7$CultivarName=="varigata"] <- "variegata"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Chamaecyparis pisifera" & merged_modern7$CultivarName=="gold mop"] <- "gold mop" #
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Coreopsis verticillata" & merged_modern7$CultivarName=="moon beam"] <- "moonbeam"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Erica cinerea" & merged_modern7$CultivarName=="c d eason"] <- "cd eason"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Hydrangea serrata" & merged_modern7$CultivarName=="blue bird"] <- "bluebird"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Iris ensata" & merged_modern7$CultivarName=="varigata"] <- "variegata"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Juniperus virginiana" & merged_modern7$CultivarName=="canertii"] <- "canaertii"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Juniperus virginiana" & merged_modern7$CultivarName=="idylwild"] <- "idyllwild"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Miscanthus sinensis" & merged_modern7$CultivarName=="dixeland"] <- "dixieland"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Molinia caerulea" & merged_modern7$CultivarName=="sky racer"] <- "skyracer"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Pinus mugo" & merged_modern7$CultivarName=="white bud"] <- "whitebud"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Pyrus communis" & merged_modern7$CultivarName=="moon glow"] <- "moonglow"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Salvia nemorosa" & merged_modern7$CultivarName=="cardonna"] <- "caradonna"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Symphyotrichum laeve" & merged_modern7$CultivarName=="blue bird"] <- "bluebird"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Vaccinium corymbosum" & merged_modern7$CultivarName=="blue crop"] <- "bluecrop"
merged_modern7$CultivarName[merged_modern7$AcceptedName=="Vaccinium corymbosum" & merged_modern7$CultivarName=="blue gold"] <- "bluegold"
merged_modern7$CultivarName<-trimws(merged_modern7$CultivarName, which="right") #remove trailing spaces from cultivar names
merged_modern7$CultivarName<-trimws(merged_modern7$CultivarName, which="left") #remove leading spaces from cultivar names
merged_modern7$combo_status[merged_modern7$combo_status=="establiushed"]<-"established"
merged_modern7<-merged_modern7 %>%  filter(!grepl(" mix",CultivarName))

cultivar_subset<-merged_modern7 %>% filter(IsCultivar=="yes"|IsCultivar=="no") %>% select(AcceptedName,combo_status,IsCultivar,CultivarName,NurseryName,State,NurseryTypeNew) %>% distinct()

#How many cultivars for EF native versus non-native species?
cult_table<-cultivar_subset %>% dplyr::group_by(AcceptedName,combo_status) %>% dplyr::summarise(n_cults=n_distinct(CultivarName[CultivarName != ""])) #dont count the straight species rows as a cultivar
cult_table$combo_status[cult_table$combo_status=="RegionalNative"]<-"EF native"
cult_table$combo_status[cult_table$combo_status=="OutsideNative"]<-"EF non-native"
cult_table$combo_status<-factor(cult_table$combo_status,levels=c("EF native","EF non-native","introduced","established","invasive"))

cult_table_nurs<-cultivar_subset %>% dplyr::group_by(AcceptedName,combo_status,NurseryTypeNew) %>% dplyr::summarise(n_cults=n_distinct(CultivarName[CultivarName != ""])) #dont count the straight species rows as a cultivar
cult_table_nurs$combo_status[cult_table_nurs$combo_status=="RegionalNative"]<-"EF native"
cult_table_nurs$combo_status[cult_table_nurs$combo_status=="OutsideNative"]<-"EF non-native"
cult_table_nurs$NurseryTypeNew[cult_table_nurs$NurseryTypeNew=="native"]<-"native specialty"
cult_table_nurs$NurseryTypeNew[cult_table_nurs$NurseryTypeNew=="traditional"]<-"conventional"
cult_table_nurs$combo_status<-factor(cult_table_nurs$combo_status,levels=c("EF native","EF non-native","introduced","established","invasive"))

#check normality for statistical test
cult_table_nurs %>%
  group_by(combo_status,NurseryTypeNew) %>%
  summarise(shapiro_p_value = shapiro.test(n_cults)$p.value) #not normal use a wilcox test
#Is there a difference by native/invasive status and nursery type? (Fig S3c)
my_comparisons3<-list(c("conventional","native specialty"))
cult1<-ggplot(cult_table_nurs, aes(x = NurseryTypeNew, y = n_cults, fill=NurseryTypeNew)) +
  geom_boxplot( width=0.75, size=0.5,outlier.size=0.5) +
  #geom_jitter(width=0.3, height=0, aes(color=combo_status),alpha=0.5,shape=16, size=2)+
  facet_wrap(~combo_status, nrow=1)+
  theme_bw() +
  scale_y_continuous(trans=scales::pseudo_log_trans(base=10),breaks=c(1,10,100,1000))+
  scale_x_discrete(labels=c("conventional", "native\nspecialty"))+
  labs(
    x = "Nursery Type",
    y = "Log Number of Cultivars"
  ) +
  #stat_compare_means(method="wilcox.test",comparisons=my_comparisons3)+
  theme(
    axis.text.x = element_text(size=8),
    #plot.title = element_text(size = 14, face = "bold"),
    legend.position = "none", panel.grid = element_blank()
  )

#Do conventional nurseries sell more cultivars per species overall? Fig. S3b
my_comparisons2<-list(c("native specialty","conventional"))
cult2<-ggplot(cult_table_nurs, aes(x = NurseryTypeNew, y = n_cults, fill=NurseryTypeNew)) +
  geom_boxplot( width=0.75, size=0.5,outlier.size=0.5) +
  #geom_jitter(width=0.3, height=0, aes(color=combo_status),alpha=0.5,shape=16, size=2)+
  theme_bw() +
  scale_y_continuous(trans=scales::pseudo_log_trans(base=10),breaks=c(1,10,100,1000))+
  scale_x_discrete(labels=c("conventional", "native\nspecialty"))+
  labs(
    x = "Nursery Type",
    y = "Log Number of Cultivars"
  ) +
  #stat_compare_means(method="wilcox.test",comparisons=my_comparisons2)+
  theme(
    axis.text.x = element_text(size=8),
    #plot.title = element_text(size = 14, face = "bold"),
    legend.position = "none", panel.grid = element_blank()
  )

#Is there a difference by native/invasive status? (use cult_table here bc not split by nursery type) Fig. S3a
my_comparisons<-list(c("EF native","EF non-native"),c("EF native","established"),c("EF native","introduced"),c("EF native","invasive"),c("EF non-native","established"),c("EF non-native","introduced"),c("EF non-native","invasive"),c("established","introduced"),c("established","invasive"),c("introduced","invasive"))
#add column to fill based off of
cult_table<-cult_table %>%
  mutate(type = case_when(
    combo_status == "EF native" ~ "a",
    combo_status == "EF non-native" ~ "b",
    combo_status == "introduced" ~ "b",
    combo_status == "established" ~ "c",
    combo_status == "invasive" ~ "c"
  ))

cult3<-ggplot(cult_table, aes(x = combo_status, y = n_cults, fill=type)) +
  geom_boxplot( width=0.75, size=0.5,outlier.size=0.5) +
  #geom_jitter(width=0.3, height=0, aes(color=combo_status),alpha=0.5,shape=16, size=2)+
  theme_bw() +
  scale_y_continuous(trans=scales::pseudo_log_trans(base=10),breaks=c(1,10,100,1000))+
  scale_x_discrete(labels=c("EF native", "EF non-\nnative", "introduced  ", "established", "invasive"))+
  labs(
    x = "Native/Invasive Status",
    y = "Log Number of Cultivars"
  ) +
  #stat_compare_means(method="wilcox.test",comparisons=my_comparisons,label="p.signif")+
  theme(
    axis.text.x = element_text(size=8),
    #plot.title = element_text(size = 14, face = "bold"),
    legend.position = "none", panel.grid = element_blank()
  )

library(gridExtra)
p1<-grid.arrange(cult3,cult2,ncol=2)
p2<-grid.arrange(cult1,ncol=1)
#grid.arrange(p1,p2,nrow=2)

png("cult_fig.png", width = 180, height = 180, units="mm",res=600)
grid.arrange(p1,p2,nrow=2)
dev.off()


#What proportion of native species in EF forest region are sold? 
wcvp_dist<-read.table("wcvp_distribution.csv", sep="|", header=TRUE, quote = "", fill=TRUE, encoding = "UTF-8") 
wcvp_dist_f <- wcvp_dist %>%
  filter(continent=='NORTHERN AMERICA') %>% #only want US
  select(plant_name_id,continent,area_code_l3,area,introduced)

wcvp_names<-read.table("wcvp_names.csv", sep="|", header=TRUE, quote = "", fill=TRUE, encoding = "UTF-8")
wcvp_names_f<-wcvp_names %>%
  select(taxon_name,plant_name_id,accepted_plant_name_id) %>%
  filter(!is.na(accepted_plant_name_id)) %>%
  distinct()
colnames(wcvp_names_f)[1]<-"AcceptedName"
state_name_to_abbreviation <- c(
  "Alabama" = "AL",
  "Alaska" = "AK",
  "Arizona" = "AZ",
  "Arkansas" = "AR",
  "California" = "CA",
  "Colorado" = "CO",
  "Connecticut" = "CT",
  "Delaware" = "DE",
  "Florida" = "FL",
  "Georgia" = "GA",
  "Hawaii" = "HI",
  "Idaho" = "ID",
  "Illinois" = "IL",
  "Indiana" = "IN",
  "Iowa" = "IA",
  "Kansas" = "KS",
  "Kentucky" = "KY",
  "Louisiana" = "LA",
  "Maine" = "ME",
  "Maryland" = "MD",
  "Massachusetts" = "MA",
  "Michigan" = "MI",
  "Minnesota" = "MN",
  "Mississippi" = "MS",
  "Missouri" = "MO",
  "Montana" = "MT",
  "Nebraska" = "NE",
  "Nevada" = "NV",
  "New Hampshire" = "NH",
  "New Jersey" = "NJ",
  "New Mexico" = "NM",
  "New York" = "NY",
  "North Carolina" = "NC",
  "North Dakota" = "ND",
  "Ohio" = "OH",
  "Oklahoma" = "OK",
  "Oregon" = "OR",
  "Pennsylvania" = "PA",
  "Rhode I." = "RI",
  "South Carolina" = "SC",
  "South Dakota" = "SD",
  "Tennessee" = "TN",
  "Texas" = "TX",
  "Utah" = "UT",
  "Vermont" = "VT",
  "Virginia" = "VA",
  "Washington" = "WA",
  "West Virginia" = "WV",
  "Wisconsin" = "WI",
  "Wyoming" = "WY"
)

wcvp_dist_f$area <- state_name_to_abbreviation[wcvp_dist_f$area]
states2keep<-c("ME","NH","VT","MA","NY","CT","NJ","PA","OH","WV","VA","DE","MD","NC","SC","GA","FL","AL","KY","MI","IN","TN","MS","RI","IL","WI","MN","MO","AR","LA")#EF states
wcvp_dist_f<-wcvp_dist_f %>%
  filter(!is.na(area)) %>%
  filter(introduced=="0") %>% #remove states where introduced
  filter(area %in% states2keep)
wcvp<-left_join(wcvp_dist_f,wcvp_names_f,by="plant_name_id")
#remove varieties, subspecies, hybrids
wcvp <- wcvp %>%
  filter(!grepl("×",AcceptedName))
wcvp <- wcvp %>%
  filter(!grepl("subsp.",AcceptedName))
wcvp <- wcvp %>%
  filter(!grepl("var.",AcceptedName))
wcvp <- wcvp %>%
  filter(!grepl("f.",AcceptedName))
wcvp <- wcvp %>%
  mutate(AcceptedName = ifelse(grepl("^\\b\\w+\\b$", AcceptedName), NA, AcceptedName))
wcvp<-wcvp[!is.na(wcvp$AcceptedName),]
length(unique(wcvp$AcceptedName)) #5083 unique native straight species in WCVP

#Create Figure 1 ----
pies<-merged_modern7 %>%
  filter(NurseryTypeNew=="traditional")
categories <- pies %>%
  select(AcceptedName,combo_status) %>%
  distinct() %>%
  group_by(combo_status)%>%
  summarise(num_species=n_distinct(AcceptedName))
categories$fraction=categories$num_species/sum(categories$num_species)
categories$ymax=cumsum(categories$fraction)
categories$ymin=c(0,head(categories$ymax,n=-1))
categories$labelPosition<-(categories$ymax+categories$ymin)/2
categories$label <- paste0(categories$combo_status, "\n", categories$num_species)
categories$combo_status <- factor(categories$combo_status, 
                                  levels = c("introduced", "established","invasive","OutsideNative","RegionalNative"))
categories <- categories %>% arrange(combo_status)
# Recalculate ymin and ymax based on the reordered data
categories$ymax <- cumsum(categories$fraction)
categories$ymin <- c(0, head(categories$ymax, n = -1))
categories$labelPosition <- (categories$ymax + categories$ymin) / 2
custom_colors <- c("RegionalNative" = "#FFDCDC", "OutsideNative"="#E9A5A5","invasive" = "#a70000", 
                   "introduced" = "#D36E6E", "established" = "#BD3737")
ggplot(categories, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=combo_status)) +
  geom_rect() +
  geom_text( x=3.5, aes(y=labelPosition, label=label), size=4, fontface="bold") +
  scale_fill_manual(values = custom_colors) + # Use custom colors
  coord_polar(theta="y") +
  xlim(c(2, 4)) +
  theme_void() +
  theme(legend.position = "none")
#no labels version
Fig1B<-ggplot(categories, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=combo_status)) +
  geom_rect() +
  scale_fill_manual(values = custom_colors) + # Use custom colors
  coord_polar(theta="y") +
  xlim(c(2, 4)) +
  theme_void() +
  theme(legend.position = "none")

png("Figure1B.png", width = 60, height = 60, units="mm",res=600)
Fig1B
dev.off()


#Pie chart: Natives
pies<-merged_modern7 %>%
  filter(NurseryTypeNew=="native")
categories <- pies %>%
  select(AcceptedName,combo_status) %>%
  distinct() %>%
  group_by(combo_status)%>%
  summarise(num_species=n_distinct(AcceptedName))
categories$fraction=categories$num_species/sum(categories$num_species)
categories$ymax=cumsum(categories$fraction)
categories$ymin=c(0,head(categories$ymax,n=-1))
categories$labelPosition<-(categories$ymax+categories$ymin)/2
categories$label <- paste0(categories$combo_status, "\n", categories$num_species)
categories$combo_status <- factor(categories$combo_status, 
                                  levels = c("introduced", "established","invasive","OutsideNative","RegionalNative"))
categories <- categories %>% arrange(combo_status)
# Recalculate ymin and ymax based on the reordered data
categories$ymax <- cumsum(categories$fraction)
categories$ymin <- c(0, head(categories$ymax, n = -1))
categories$labelPosition <- (categories$ymax + categories$ymin) / 2
custom_colors <- c("RegionalNative" = "#D1E9FF", "OutsideNative"="#9DC5EA","invasive" = "#0057A9", 
                   "introduced" = "#69A0D4", "established" = "#347CBF")
ggplot(categories, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=combo_status)) +
  geom_rect() +
  geom_text( x=3.5, aes(y=labelPosition, label=label), size=4, fontface="bold") +
  scale_fill_manual(values = custom_colors) + # Use custom colors
  coord_polar(theta="y") +
  xlim(c(2, 4)) +
  theme_void() +
  theme(legend.position = "none")
#No labels version
Fig1C<-ggplot(categories, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=combo_status)) +
  geom_rect() +
  scale_fill_manual(values = custom_colors) + # Use custom colors
  coord_polar(theta="y") +
  xlim(c(2, 4)) +
  theme_void() +
  theme(legend.position = "none")

png("Figure1C.png", width = 60, height = 60, units="mm",res=600)
Fig1C
dev.off()

#Figure 2----
regulation_states <- c("CT", "DE", "KY", "MA", "MD", "ME", "NH", "NJ", "NY", "OH", "PA", "RI", "VA", "VT", "WV")
# Filter the States column to only states in NE
species_regional <- merged_modern7 %>%
  mutate(RegionalStatesRegulated = sapply(str_extract_all(StatesRegulated, str_c(regulation_states, collapse = "|")), 
                                          function(x) str_c(x, collapse = ", ")))
species_regional$RegionalStatesRegulated[species_regional$RegionalStatesRegulated==""]<-NA
species_regional_bad<-species_regional %>%
  select(AcceptedName,combo_status,RegionalStatesRegulated) %>%
  distinct()
#Create table of non-native species being sold
table<-merged_modern7 %>%
  select(AcceptedName,NurseryName,State,combo_status) %>%
  distinct() %>%
  filter(combo_status=="introduced"|combo_status=="established"|combo_status=="invasive") %>%
  group_by(AcceptedName) %>%
  mutate(n_nurseries=n_distinct(paste(NurseryName,State))) #number of nurseries each species is sold at

species_regional_bad<-species_regional_bad %>%
  select(AcceptedName,RegionalStatesRegulated) %>%
  distinct()

fig2<-merged_modern7 %>%
  filter(NurseryTypeNew=="traditional") %>%
  select(AcceptedName,NurseryName,combo_status,State) %>%
  distinct() %>%
  group_by(State) %>%
  mutate(totalNurseriesState=n_distinct(paste(NurseryName,State)))

species_regional_bad2 <- species_regional_bad %>%
  filter(!is.na(RegionalStatesRegulated))

fig2<- fig2 %>%
  filter(combo_status=="introduced"|combo_status=="established"|combo_status=="invasive") %>%
  filter(AcceptedName %in% species_regional_bad2$AcceptedName) %>% #filter to only non-native regionally regulated species
  ungroup() %>%
  group_by(State) %>%
  mutate(nNurseriesSellingRegSppPerState=n_distinct(paste(NurseryName,State))) 

fig2_distinct<- fig2 %>%
  select(State,totalNurseriesState,nNurseriesSellingRegSppPerState) %>%
  distinct()

fig2_distinct$propNurseries_SellingRegSpecies<-fig2_distinct$nNurseriesSellingRegSppPerState/fig2_distinct$totalNurseriesState #state colors

#How many regulated species are sold at each nursery
fig2_stat<-fig2 %>%
  ungroup() %>%
  group_by(NurseryName,State) %>%
  summarize(n_species_sold=n_distinct(AcceptedName))
fig2_stat_mean<-fig2_stat %>%
  group_by(State)%>%
  mutate(meanSppSoldPerState=mean(n_species_sold)) %>% #mean
  mutate(minSppSoldPerState=min(n_species_sold)) %>% #minimum
  mutate(maxSppSoldPerState=max(n_species_sold)) %>% #maximum
  mutate(medSppSoldPerState=median(n_species_sold)) %>% #median
  select(State,meanSppSoldPerState,minSppSoldPerState,maxSppSoldPerState,medSppSoldPerState) %>%
  distinct()

#How many nurseries are selling each regulated species across the region
fig2_spp<-fig2 %>%
  ungroup() %>%
  select(AcceptedName,NurseryName,State)%>%
  distinct() %>%
  group_by(AcceptedName) %>%
  summarize(nNurseriesPerSpp=n_distinct(paste(NurseryName,State)))
#Append EICAT scores
#fig2_spp<-left_join(fig2_spp,eicats,by="AcceptedName")

fig2_export<-left_join(fig2_distinct,fig2_stat_mean,by="State")
write.csv(fig2_export,"fig2_export.csv",row.names=FALSE) #used to create Fig. S1 in ArcGIS Pro

#Figure 3---- 
#NMDS ordination plot (source: https://rpubs.com/CPEL/NMDS)
library(vegan)
library(tidyr)
library(tibble)
nmds.data<-merged_modern7 %>%
  select(AcceptedName,NurseryName,State,NurseryTypeNew,total_spp,combo_status) %>%
  filter(combo_status=="RegionalNative") %>%
  distinct()
nmds.data <- nmds.data %>%
  filter(total_spp>=15)

nmds_wide <- nmds.data %>%
  group_by(NurseryName, AcceptedName) %>%
  summarise(count = n(), .groups = 'drop') %>%
  pivot_wider(names_from = AcceptedName, values_from = count, values_fill = list(count = 0)) %>%
  column_to_rownames(var = "NurseryName")
dist<-vegdist(nmds_wide,method="bray")
dist.matrix<-as.matrix(dist,labels=TRUE)
set.seed(333)
nmds<-metaMDS(dist.matrix,distance="bray",k=3,maxit=999,trymax=500)  
scores <- as.data.frame(scores(nmds))
# Add NurseryName for merging
scores$NurseryName <- rownames(scores)
# Merge with original data to get NurseryType
scores <- scores %>%
  left_join(nmds.data %>% select(NurseryName, State, NurseryTypeNew) %>% distinct(), by = "NurseryName")
#data for hulls
grp.nat <- scores[scores$NurseryTypeNew == "native", ][chull(scores[scores$NurseryTypeNew == 
                                                                      "native", c("NMDS1", "NMDS2")]), ]  # hull values for grp A
grp.trad <- scores[scores$NurseryTypeNew == "traditional", ][chull(scores[scores$NurseryTypeNew == 
                                                                            "traditional", c("NMDS1", "NMDS2")]), ]  # hull values for grp B
grp.trad$NurseryTypeNew[grp.trad$NurseryTypeNew=="traditional"]<-"conventional"
scores$NurseryTypeNew[scores$NurseryTypeNew=="traditional"]<-"conventional"

hull.data <- rbind(grp.nat, grp.trad)
Fig3A<-ggplot(scores, aes(x = NMDS1, y = NMDS2, color = NurseryTypeNew, shape = NurseryTypeNew)) +
  geom_point(size = 1.5) +
  #geom_polygon(data = hull.data, aes(x = NMDS1, y = NMDS2, fill = NurseryTypeNew), alpha = 0.2) +
  #stat_ellipse() +
  scale_color_manual(values = c("native" = "#0057A9", "conventional" = "#a90000"),
                     labels = c("native" = "Native Specialty", "conventional" = "Conventional"),
                     name=NULL) +
  scale_shape_manual(values = c("native" = 17, "conventional" = 16),
                     labels = c("native" = "Native Specialty", "conventional" = "Conventional"),
                     name=NULL) + # 17 = triangle, 16 = circle
  theme_classic() +
  labs(
    x = "NMDS1",
    y = "NMDS2",
    color = "Nursery Type",
    shape = "Nursery Type"
  )+
  theme(legend.position = "bottom",
        legend.direction="horizontal",
        legend.spacing.x= unit(0.25,"pt"),
        legend.text=element_text(margin=margin(l=0,r=0,t=0,b=0)),
        legend.box.margin=margin(-10,-10,-10,-10))

#Native vs Traditional nurseries plot
NatTrad<-merged_modern7 %>%
  filter(combo_status=="RegionalNative") %>%
  group_by(AcceptedName,combo_status,NurseryTypeNew,Habit) %>%
  summarize(n=n_distinct(paste0(NurseryName,State)))
NatTrad_pivot <- NatTrad %>%
  pivot_wider(names_from = NurseryTypeNew, values_from = n, values_fill = 0) %>%
  ungroup()

Tol_bright <- c('#CCBB44','#EE6677', '#4477AA', '#228833', '#66CCEE')

Fig3B<-
  ggplot(NatTrad_pivot, aes(x = native, y = traditional, color = Habit)) +
  geom_point(size=1.5,alpha=0.8,shape=16) +
  #geom_text(aes(label = AcceptedName), size = 3) +
  labs(
    x = "Number of Native Specialty Nurseries",
    y = "Number of Conventional Nurseries",
    color = "Habit"  # Add legend title
  ) +
  scale_color_manual(values = Tol_bright, name=NULL) +  # Apply custom colors
  #scale_shape_manual(values = c(16, 17, 3, 15, 8)) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray32", linewidth = 1) +
  theme_classic()+
  theme(legend.position = "bottom",
        legend.direction="horizontal",
        legend.spacing.x= unit(0.2,"pt"),
        legend.text=element_text(margin=margin(l=-5,r=-5,t=0,b=0)),
        legend.box.margin=margin(-10,0,-10,-10))
#legend.background=element_rect(color="black"),
#legend.justification.inside=c(0.97,0.95))
#guides(color=guide_legend(nrow=2))
library(cowplot)

png("Figure3.png", width = 176, height = 88, units="mm",res=600)
plot_grid(Fig3A,Fig3B,ncol=2,align="v")
dev.off()

#Nonnative Supplemental Table (Sup Table 2)----
NNSupTable<-merged_modern7 %>%
  filter(combo_status=="introduced"|combo_status=="established"|combo_status=="invasive") %>%
  group_by(AcceptedName) %>%
  mutate(n_ConvNurseries=n_distinct(paste(NurseryName[NurseryTypeNew == "traditional"],State[NurseryTypeNew == "traditional"]))) %>% #number of conventional nurseries each species is sold at
  mutate(n_NatNurseries=n_distinct(paste(NurseryName[NurseryTypeNew == "native"],State[NurseryTypeNew == "native"]))) %>% #number of native specialty nurseries each species is sold at
  ungroup()
NNSupTable<-NNSupTable %>%
  select(AcceptedName,combo_status,GlobalStatus,n_ConvNurseries,n_NatNurseries,StatesSold,StatesRegulated) %>%
  distinct()
#add if sold in region where regulated
regulation_states <- c("CT", "DE", "KY", "MA", "MD", "ME", "NH", "NJ", "NY", "OH", "PA", "RI", "VA", "VT", "WV")
NNSupTable <- NNSupTable %>%
  mutate(SoldInRegionRegulated = if_else(
    !is.na(StatesRegulated) & str_detect(StatesRegulated, str_c(regulation_states, collapse = "|")),
    "Yes", "No"
  ))
#add if sold in state where regulated
NNSupTable <- NNSupTable %>%
  mutate(SoldInStateRegulated = ifelse(
    mapply(function(sold, regulated) {
      any(strsplit(sold, ",\\s*")[[1]] %in% strsplit(regulated, ",\\s*")[[1]])
    }, StatesSold, StatesRegulated), 
    "Yes", 
    NA))
NNSupTable$SoldInRegionRegulated[is.na(NNSupTable$SoldInRegionRegulated)]<-"No"
NNSupTable$SoldInStateRegulated[is.na(NNSupTable$SoldInStateRegulated)]<-"No"
NNSupTable <- NNSupTable[order(-NNSupTable$n_ConvNurseries), ]
#update those identified as invasive or established in the US in global column
NNSupTable$GlobalStatus <- ifelse(NNSupTable$combo_status == "invasive", "invasive", NNSupTable$GlobalStatus)
NNSupTable$GlobalStatus[NNSupTable$combo_status == "established" & 
                          (NNSupTable$GlobalStatus != "invasive" | is.na(NNSupTable$GlobalStatus))] <- "established"
#add if sold in conventional, native specialty, or both
native_spp<-unique(merged_modern7$AcceptedName[merged_modern7$NurseryTypeNew=="native"])
conventional_spp<-unique(merged_modern7$AcceptedName[merged_modern7$NurseryTypeNew=="traditional"])
NNSupTable <- NNSupTable %>%
  mutate(
    NurseryTypeSold = case_when(
      AcceptedName %in% native_spp & AcceptedName %in% conventional_spp ~ "both",
      AcceptedName %in% native_spp ~ "native specialty",
      AcceptedName %in% conventional_spp ~ "conventional",
      TRUE ~ NA_character_
    )
  )
colnames(NNSupTable)[2]<-"L48Status"

#write.csv(NNSupTable,"NonNativesSupTable_Sep9.csv",row.names=FALSE)

#Native Supplemental Table (Sup Table 3)----
SupNativeTable<-merged_modern7 %>%
  filter(combo_status=="RegionalNative"|combo_status=="OutsideNative") %>%
  group_by(AcceptedName) %>%
  mutate(n_nurseries=n_distinct(paste(NurseryName,State))) %>% #number of nurseries each species is sold at
  ungroup()
SupNativeTable<-SupNativeTable %>%
  select(AcceptedName,Habit,combo_status,n_nurseries,StatesSold,NativeRange,StatesRegulated) %>%
  distinct()


NatAnalysis<-merged_modern7 %>% #rank species by which are most sold in native nurseries and not traditional
  select(AcceptedName,NurseryName,NurseryTypeNew,State,combo_status) %>%
  distinct()
proportions <- NatAnalysis %>%
  group_by(AcceptedName, NurseryTypeNew) %>%
  summarise(n_nurseries = n_distinct(NurseryName), .groups = 'drop')
proportions$tot_nurseries<-NA
proportions$tot_nurseries[proportions$NurseryTypeNew=="traditional"]<-length(unique(paste(NatAnalysis$NurseryName,NatAnalysis$State)[NatAnalysis$NurseryTypeNew=="traditional"]))
proportions$tot_nurseries[proportions$NurseryTypeNew=="native"]<-length(unique(paste(NatAnalysis$NurseryName,NatAnalysis$State)[NatAnalysis$NurseryTypeNew=="native"]))
proportions<-proportions %>% 
  mutate(proportion = n_nurseries/tot_nurseries)
proportions<-proportions[-c(3,4)]
proportions_wide <- proportions %>%
  pivot_wider(
    names_from = NurseryTypeNew, 
    values_from = proportion,
    values_fill = list(proportion = 0)
  )    
#append to native table
SupNativeTable<-left_join(SupNativeTable,proportions_wide,by="AcceptedName")
colnames(SupNativeTable)[c(8,9)]<-c("P_conventional","P_native")
# Create a new column to store the ranking metric
SupNativeTable$RankMetric <- SupNativeTable$P_native - SupNativeTable$P_conventional
# Rank the species based on the RankMetric, from highest to lowest
SupNativeTable <- SupNativeTable[order(-SupNativeTable$RankMetric), ]
SupNativeTable$Rank<-seq(1:length(SupNativeTable$AcceptedName)) 
SupNativeTable<-SupNativeTable[-10]
SupNativeTable$combo_status[SupNativeTable$combo_status=="RegionalNative"]<-"EFNative"
SupNativeTable$combo_status[SupNativeTable$combo_status=="OutsideNative"]<-"EFNonNative"
colnames(SupNativeTable)[3]<-"NativeStatus"

write.csv(SupNativeTable,"SupNativeTable_Sep9.csv",row.names=FALSE)

#Supplemental Figure 2----
nurserytype<-merged_modern7 %>%
  select(AcceptedName,NurseryName,State,combo_status) %>%
  distinct() %>%
  group_by(NurseryName,State,combo_status) %>%
  summarize(n_combostatus=n()) 
all_categories <- c("RegionalNative", "OutsideNative","established", "introduced", "invasive")
complete_data <- nurserytype %>%
  select(NurseryName) %>%
  distinct() %>%
  crossing(combo_status = all_categories)
expanded_data <- complete_data %>%
  left_join(nurserytype, by = c("NurseryName", "State", "combo_status")) %>%
  replace_na(list(n_combostatus = 0, total_spp = 0)) %>%
  ungroup() %>%
  group_by(NurseryName,State) %>%
  mutate(total_spp=sum(n_combostatus))
expanded_data <- expanded_data %>%
  group_by(State, NurseryName) %>%
  mutate(
    combined_native_sum = sum(n_combostatus[combo_status %in% c("RegionalNative", "OutsideNative")], na.rm = TRUE)
  ) %>%
  ungroup()
pNat<-expanded_data %>%
  select(NurseryName,State,combined_native_sum,total_spp) %>%
  distinct() %>%
  mutate(percNat=combined_native_sum/total_spp)
toappend<-merged_modern7 %>%
  ungroup() %>%
  select(NurseryName,NurseryTypeNew) %>%
  distinct()
pNat<-left_join(pNat,toappend,by="NurseryName")
ggplot(pNat, aes(x = percNat)) +
  geom_histogram(binwidth = 0.05, fill = "grey", color = "black") +
  labs(
    x = "Percentage of Native Species",
    y = "Count"
  ) +
  geom_vline(xintercept=0.8,color="red",linetype="dashed",size=1.5)+
  theme_minimal()

#Cultivar Supplemental Table (Sup Table 4)----
cult_table<-merged_modern7 %>%
  filter(IsCultivar=="yes") %>%
  group_by(AcceptedName,CultivarName) %>%
  mutate(n_nurseries_cult=n_distinct(paste(NurseryName,State))) %>% #number of nurseries each cultivar is sold at
  ungroup()
cult_table<-cult_table %>% select(AcceptedName,combo_status,CultivarName,n_nurseries_cult) %>% distinct()
cult_table$combo_status[cult_table$combo_status=="RegionalNative"]<-"EFNative"
cult_table$combo_status[cult_table$combo_status=="OutsideNative"]<-"EFNonNative"
#write.csv(cult_table,file="SupCultivarTableSep9.csv",row.names=FALSE)