library(raster)
library(maptools)
library(fossil)
library(rnoaa)
library(stringr)
library(UScensus2010)
library(rgdal)
library(rgeos)

rasterOptions(tmpdir = 'path-to-raster-directory')

### Combined data file for counties
county.dat = NULL

### STANDARDIZE COUNTY NAMES AND FIPS CODES
fipscodes$state_county = paste(tolower(fipscodes$state),tolower(fipscodes$county),sep='_')
fipscodes$FIPS = as.character(paste("X",str_pad(fipscodes$fips_state, 2, pad = "0"),str_pad(fipscodes$fips_county, 3, pad = "0"),sep=''))
fipscodes$state_county = gsub('_st ','_saint ',fipscodes$state_county)

  # Add new county in South Dakota (Shannon county [46113] was changed to Oglala Lakota County [46102] in 2015)
  fipscodes = rbind(fipscodes,c('South Dakota','Oglala Lakota',46,102,46102,'south dakota_oglala Lakota','X46102')) 
  # Add Montgomery County,AR, which was not in the dataset
  fipscodes = rbind(fipscodes,c('Arkansas','Montgomery',5,97,597,'montgomery_arkansas','X05097'))

### DOWNLOAD AND EDIT US COUNTY MAP AND DATA
#counties = getData(name="GADM", country="USA", level=2)
#us48 = counties[!counties$NAME_1 %in% c('Alaska','Hawaii'), ]
#us48$NAME_1 = tolower(us48$NAME_1)
#us48$NAME_2 = tolower(us48$NAME_2)
#us48$state_county = paste(us48$NAME_1,us48$NAME_2,sep="_") # combines state and county name to get a unique identifier

### LOAD EXISTING US COUNTY MAP AND DATA AND EDIT
counties = readShapeSpatial("/Users/charlesgwillis/Google Drive/Research/general data/North America Shape Files/CensusBureau/cb_2015_us_county_20m/cb_2015_us_county_20m.shp")
counties$FIPS = paste('X',counties$STATEFP,counties$COUNTYFP,sep='')
# Sets projection to EPSG:4326
projection(counties) = '+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0'
us48 = counties[!counties$STATEFP %in% c('02','15','60','66','69','72','74','78'), ] # Remove all the 48 continential states

##### 
# PLOT COUNTY NAMES BY STATE
##### 
  # ARKANSAS = 05, South Dakota = 46
  stt = us48[us48$STATEFP=='46',]
  centroids = as.data.frame(getSpPPolygonsLabptSlots(stt))
  names(centroids) = c('lon','lat')
  centroids$FIPS = stt$FIPS
  plot(stt)
  text(centroids$lon, centroids$lat, labels=stt$COUNTYFP, col="blue", cex=.7)

#us48 = us48[us48$FIPS %in% c('X05097','X46102'),]
  

######
### EXTRACT BIO CLIMATE DATA
#####
bioclim.var = list.files('/Users/charlesgwillis/Google Drive/North American niche modeling project/data/climate/bioclim/bioclim_ESPG4326',
                        pattern = 'current')
bioclim.dir = list.files('/Users/charlesgwillis/Google Drive/North American niche modeling project/data/climate/bioclim/bioclim_ESPG4326',
                          pattern = 'current',full.names = T)
bioclim.names = gsub('clim.current.','',bioclim.var)
bioclim.names = gsub('.tif','',bioclim.names)
# Loop to extract climate data for all 19 biovariables
bioclim.dat = NULL
for(i in 1:length(bioclim.dir)){
  a = proc.time()[[3]]
  print(i)
  ras = raster(bioclim.dir[[i]])
  ext = extract(ras,us48)
  mn = unlist(lapply(ext, function(x) if (!is.null(x)) mean(x, na.rm=TRUE) else NA ))
  vr = unlist(lapply(ext, function(x) if (!is.null(x)) var(x, na.rm=TRUE) else NA ))
  sd = unlist(lapply(ext, function(x) if (!is.null(x)) sd(x, na.rm=TRUE) else NA ))
  mi = unlist(lapply(ext, function(x) if (!is.null(x)) min(x, na.rm=TRUE) else NA ))
  ma = unlist(lapply(ext, function(x) if (!is.null(x)) max(x, na.rm=TRUE) else NA ))
  md = unlist(lapply(ext, function(x) if (!is.null(x)) median(x, na.rm=TRUE) else NA ))
  ln = unlist(lapply(ext, function(x) if (!is.null(x)) length(x) else NA ))
  
  out = as.data.frame(cbind(mn,vr,sd,mi,ma,md,ln))
  out$FIPS = us48$FIPS
  bioclim.dat[[i]] = out
  print(proc.time()[[3]]-a)
}
names(bioclim.dat) = bioclim.names

######
### EXTRACT AG CLIMATE DATA
######
agclim.var = list.files('/Users/charlesgwillis/Google Drive/North American niche modeling project/data/climate/agclim/agclim_ESPG4326',
                         pattern = 'current')
agclim.dir = list.files('/Users/charlesgwillis/Google Drive/North American niche modeling project/data/climate/agclim/agclim_ESPG4326',
                         pattern = 'current',full.names = T)
agclim.names = gsub('agclim.current.','',agclim.var)
agclim.names = gsub('.tif','',agclim.names)
# Loop to extract climate data for all 5 biovariables
agclim.dat = NULL
for(i in 1:length(agclim.dir)){
  a = proc.time()[[3]]
  print(i)
  ras = raster(agclim.dir[[i]])
  ext = extract(ras,us48)
  mn = unlist(lapply(ext, function(x) if (!is.null(x)) mean(x, na.rm=TRUE) else NA ))
  vr = unlist(lapply(ext, function(x) if (!is.null(x)) var(x, na.rm=TRUE) else NA ))
  sd = unlist(lapply(ext, function(x) if (!is.null(x)) sd(x, na.rm=TRUE) else NA ))
  mi = unlist(lapply(ext, function(x) if (!is.null(x)) min(x, na.rm=TRUE) else NA ))
  ma = unlist(lapply(ext, function(x) if (!is.null(x)) max(x, na.rm=TRUE) else NA ))
  md = unlist(lapply(ext, function(x) if (!is.null(x)) median(x, na.rm=TRUE) else NA ))
  ln = unlist(lapply(ext, function(x) if (!is.null(x)) length(x) else NA ))
  
  out = as.data.frame(cbind(mn,vr,sd,mi,ma,md,ln))
  out$FIPS = us48$FIPS
  agclim.dat[[i]] = out
  print(proc.time()[[3]]-a)
}
names(agclim.dat) = agclim.names

######
### EXTRACT ENVIRONMENTAL DATA (HWSD and Elevation)
######
environ.var = list.files('/Users/charlesgwillis/Google Drive/North American niche modeling project/data/environment/env_ESPG4326/')
environ.dir = list.files('/Users/charlesgwillis/Google Drive/North American niche modeling project/data/environment/env_ESPG4326/',
                      ,full.names = T)
environ.names = gsub('env.','',environ.var)
environ.names = gsub('.tif','',environ.names)

# Loop to extract climate data for all 11 environmental layers
environ.dat = NULL
for(i in 1:length(environ.dir)){
  a = proc.time()[[3]]
  print(i)
  ras = raster(environ.dir[[i]])
  ext = extract(ras,us48)
  mn = unlist(lapply(ext, function(x) if (!is.null(x)) mean(x, na.rm=TRUE) else NA ))
  vr = unlist(lapply(ext, function(x) if (!is.null(x)) var(x, na.rm=TRUE) else NA ))
  sd = unlist(lapply(ext, function(x) if (!is.null(x)) sd(x, na.rm=TRUE) else NA ))
  mi = unlist(lapply(ext, function(x) if (!is.null(x)) min(x, na.rm=TRUE) else NA ))
  ma = unlist(lapply(ext, function(x) if (!is.null(x)) max(x, na.rm=TRUE) else NA ))
  md = unlist(lapply(ext, function(x) if (!is.null(x)) median(x, na.rm=TRUE) else NA ))
  ln = unlist(lapply(ext, function(x) if (!is.null(x)) length(x) else NA ))
  
  out = as.data.frame(cbind(mn,vr,sd,mi,ma,md,ln))
  names(out) = paste(environ.names[[i]],c('mn','vr','sd','mi','ma','md','ln'),sep='.')
  out$FIPS = us48$FIPS
  environ.dat[[i]] = out
  print(proc.time()[[3]]-a)
}
names(environ.dat) = environ.names[1:11]

######
### Merge County data and fips code data
######
county.dat = fipscodes
county.dat = county.dat[county.dat$FIPS %in% us48$FIPS,]
us48 = us48[us48$FIPS %in% county.dat$FIPS,]

######
### Find county centroid
######
centroids = as.data.frame(getSpPPolygonsLabptSlots(us48))
names(centroids) = c('lon','lat')
centroids$FIPS = us48$FIPS

######
### Calculate County Area in sq. km
# Transform to EPSG2163, US National Atlas Equal Area
######
us48.epsg2163 = spTransform(us48, CRS("+proj=laea +lat_0=45 +lon_0=-100 +x_0=0 +y_0=0 +a=6370997 +b=6370997 +units=m +no_defs"))
us48.area = areaPoly(sp.object=us48.epsg2163)/1000000
us48.area = as.data.frame(us48.area)
us48.area$FIPS = us48.epsg2163$FIPS

### Load County Census Data
# "DataSet.txt". State & County QuickFacts. U.S. Census Bureau. Archived from the original on September 30, 2013. Retrieved November 16, 2012.
# POP010210 is the 2010 census estimate
us.census = read.csv('/Users/charlesgwillis/Dropbox/Work/projects/US County Phylodiversity/data/County Data/Census County Data 2012.csv')
us.census$FIPS = as.character(paste("X",str_pad(us.census$fips, 5, pad = "0"),sep=''))
us.census$FIPS[us.census$FIPS == 'X46113'] = 'X46102' #Change county in South Dakota (Shannon county [46113] was changed to Oglala Lakota County [46102] in 2015)
us.census = us.census[,c('FIPS','POP010210')] #reduce data frame

### Combine county level data for US48
# Loops to rename all statistics with variable specific names
for(i in 1:length(environ.dat)){
  names(environ.dat[[i]]) = c(paste(environ.names[[i]],c('mn','vr','sd','mi','ma','md','ln'),sep='.'),'FIPS')
}
for(i in 1:length(bioclim.dat)){
  names(bioclim.dat[[i]]) = c(paste(bioclim.names[[i]],c('mn','vr','sd','mi','ma','md','ln'),sep='.'),'FIPS')
}
for(i in 1:length(agclim.dat)){
  names(agclim.dat[[i]]) = c(paste(agclim.names[[i]],c('mn','vr','sd','mi','ma','md','ln'),sep='.'),'FIPS')
}

# Concatenate county dataset
all.dat = c(environ.dat, bioclim.dat, agclim.dat)
all.dat = all.dat[-c(32,33)] # remove repeated bioclim variables from the agclim dataset 'bio06' and 'bio12'

conc.dat = all.dat[[1]]
for(i in 2:length(all.dat)){
  conc.dat = merge(conc.dat,all.dat[[i]],by='FIPS')
}

# Merge County area data with conc.data
conc.dat = merge(conc.dat,us48.area,by = 'FIPS')

# Merge County Census data with conc.data
conc.dat = merge(conc.dat,us.census,by = 'FIPS')

# Export county data
write.csv(conc.dat,file = '/Users/charlesgwillis/Dropbox/Work/projects/US County Phylodiversity/data/County Data/county.environmental.data.04202017.csv',quote = FALSE,row.names = F)

### PLOT County data in grey scale for gravel
x = merge(us48.epsg2163,conc.dat,by='FIPS')
plot(x,col=grey(x$POP010210/max(x$POP010210)))


