rm(list=ls())
library(tidyverse)
library(readxl)
Data from IBGE
##Load Area Harvested Data
#maize data are by municiaplity for all states (Amunis)
maize_harv_Amunis_Data <- read_excel("ProductionData/maize_brazil.xlsx", sheet = "Harvested area (hectares)", skip = 1, na = c("", "-", "..."))
#maize data are by municiaplity for all states (Amunis)
maize2_harv_Amunis_Data <- read_excel("ProductionData/maize2_brazil.xlsx", sheet = "Harvested area (hectares)", skip = 1, na = c("", "-", "..."), col_types = c("numeric", "guess","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric")) #need to specify for 2010 column
#maize data are by municiaplity for all states (Amunis)
soy_harv_Amunis_Data <- read_excel("ProductionData/soybean_brazil.xlsx", sheet = "Harvested area (hectares)", skip = 1, na = c("", "-", "..."))
Fstate_vals <- c(17, 29, 31, 35, 41, 42, 43, 50, 51, 52)
Fstate_abbrev <- c("TO", "BA", "MG", "SP", "PR", "SC", "RS", "MS", "MT", "GO")
#Manipulate data
#MAIZE
#has the same data strucutre (with some differences in unit conversions - could write function to cover both?)
maize_harv_Amunis <- maize_harv_Amunis_Data %>%
rename(muniID = `IBGE CODE`) %>%
filter(!is.na(muniID)) %>% #safer way to remove text line in muniID
mutate(state = substr(muniID, 1, 2)) %>% #extract the muniID
mutate_at(vars("2001":"2018"), as.numeric) %>% #convert values to numeric
dplyr::select(-Municipality) #drop unwanted columns
maize_harv_Astates <- maize_harv_Amunis %>%
group_by(state) %>%
summarise_all(sum, na.rm=T) %>%
dplyr::select(-muniID) #drop unwanted columns
maize_harv_Astates_long <- maize_harv_Astates %>%
gather(key = year, value = maize_ha, -state) %>%
mutate_at(vars(year), as.integer)
maize_harv_Amunis_long <- maize_harv_Amunis %>%
dplyr::select(-state) %>%
gather(key = year, value = maize_ha, -muniID) %>%
mutate_at(vars(year), as.integer)
#MAIZE2
maize2_harv_Amunis <- maize2_harv_Amunis_Data %>%
rename(muniID = `IBGE CODE`) %>%
filter(!is.na(muniID)) %>% #safer way to remove text line in muniID
mutate(state = substr(muniID, 1, 2)) %>% #extract the muniID
mutate_at(vars("2003":"2018"), as.numeric) %>% #convert values to numeric
dplyr::select(-Municipality) #drop unwanted columns
maize2_harv_Astates <- maize2_harv_Amunis %>%
group_by(state) %>%
summarise_all(sum, na.rm=T) %>%
dplyr::select(-muniID) #drop unwanted columns
maize2_harv_Astates_long <- maize2_harv_Astates %>%
gather(key = year, value = maize2_ha, -state) %>%
mutate_at(vars(year), as.integer)
maize2_harv_Amunis_long <- maize2_harv_Amunis %>%
dplyr::select(-state) %>%
gather(key = year, value = maize2_ha, -muniID) %>%
mutate_at(vars(year), as.integer)
##SOY
soy_harv_Amunis <- soy_harv_Amunis_Data %>%
rename(muniID = `IBGE CODE`) %>%
filter(!is.na(muniID)) %>% #safer way to remove text line in muniID
mutate(state = substr(muniID, 1, 2)) %>% #extract the muniID
mutate_at(vars("2001":"2018"), as.numeric) %>% #convert values to numeric
dplyr::select(-Municipality) #drop unwanted columns
soy_harv_Astates <- soy_harv_Amunis %>%
group_by(state) %>%
summarise_all(sum, na.rm=T) %>%
dplyr::select(-muniID) #drop unwanted columns
soy_harv_Astates_long <- soy_harv_Astates %>%
gather(key = year, value = soy_ha, -state) %>%
mutate_at(vars(year), as.integer)
soy_harv_Amunis_long <- soy_harv_Amunis %>%
dplyr::select(-state) %>%
gather(key = year, value = soy_ha, -muniID) %>%
mutate_at(vars(year), as.integer)
harv_muni_year <- left_join(maize_harv_Amunis_long, maize2_harv_Amunis_long, by = c("year", "muniID"))
harv_muni_year <- left_join(harv_muni_year, soy_harv_Amunis_long, by = c("year", "muniID"))
harv_muni_xxxx <- harv_muni_year %>%
filter(year == 2018) %>%
#filter(simulated == "TRUE") %>%
#dplyr::select(muniID, maize_ha, soy_ha) %>%
mutate(maize2_ha = replace(maize2_ha, is.na(maize2_ha),0)) %>%
mutate(maize_ha = replace(maize_ha, is.na(maize_ha),0)) %>%
mutate(soy_ha = replace(soy_ha, is.na(soy_ha),0)) %>%
mutate(soy_ha = if_else(soy_ha > maize2_ha,soy_ha - maize2_ha, 0)) %>%
mutate(total_ha = rowSums(dplyr::select(., ends_with("_ha")), na.rm=T)) %>%
mutate(maize_prop = round(maize_ha / total_ha,3)) %>%
mutate(soy_prop = round(soy_ha / total_ha,3)) %>%
mutate(cum_ms_prop = rowSums(select(., "maize_prop", "soy_prop"))) %>%
mutate(dc_prop = round(maize2_ha / total_ha,3))
write_csv(harv_muni_xxxx, "muni2018_harvestAreas_newDC.csv")
#create new data frame
harv_state_year <- left_join(maize_harv_Astates_long, maize2_harv_Astates_long, by = c("year", "state"))
harv_state_year <- left_join(harv_state_year, soy_harv_Astates_long, by = c("year", "state"))
#add focal states indicator
harv_state_year <- harv_state_year %>%
mutate(simulated = state %in% Fstate_vals) %>%
#total ha
mutate(total_ha = rowSums(select(., ends_with("_ha")), na.rm=T)) %>%
#calculate double cropping
mutate(suffsoy = if_else(!is.na(maize2_ha), soy_ha >= maize2_ha, F)) %>%
#DC area is twice maize2
mutate(DC_ha = if_else(suffsoy, 2 * maize2_ha,
if_else(!is.na(maize2_ha), 2 * soy_ha, 0))) %>%
#area of soy in DC
mutate(soyDC_ha = if_else(suffsoy, maize2_ha,
if_else(!is.na(maize2_ha), soy_ha, 0))) %>%
#area of soy not in DC
mutate(soyNDC_ha = if_else(suffsoy, soy_ha - maize2_ha,
if_else(!is.na(maize2_ha), 0, soy_ha))) %>%
mutate(m2DC_ha = if_else(suffsoy, maize2_ha,
if_else(!is.na(maize2_ha), soy_ha, 0))) %>%
mutate(m2NDC_ha = if_else(suffsoy, 0,
if_else(!is.na(maize2_ha), maize2_ha - soy_ha, 0))) %>%
#proportion of m2 in DC
mutate(m2DC_prop = if_else(maize2_ha > 0, m2DC_ha / maize2_ha, 0)) %>%
#proportion of soy in DC
mutate(soyDC_prop = if_else(soyDC_ha > 0, soyDC_ha / soy_ha, 0))
harv_state_year_props <- harv_state_year %>%
select(state, year, total_ha, soyDC_prop, m2DC_prop) %>%
mutate(maize_prop = 1)
psy_long <- harv_state_year %>%
select(-total_ha, -soyDC_prop, -m2DC_prop) %>%
gather(key = commodity, value = ha, -state, -year, -simulated, -suffsoy)
psimy_long <- psy_long %>%
group_by(simulated, year, commodity) %>%
summarise(sumsim = sum(ha, na.rm=T))
harv_state_year_props
Filter for 2001 only to get proportion of maize vs soy by state (for model initialisation)
harv_state_2001 <- harv_state_year %>%
filter(year == 2001) %>%
filter(simulated == "TRUE") %>%
select(state, maize_ha, soy_ha) %>%
mutate(total_ha = rowSums(select(., ends_with("_ha")), na.rm=T)) %>%
mutate(maize_prop = maize_ha / total_ha)
harv_state_2001
#plot areas through time
psimy_long_sim <- psimy_long %>%
filter(simulated == "TRUE")
psimy_long_sim %>%
filter(commodity == "DC_ha" | commodity == "maize_ha" | commodity == "soyNDC_ha" | commodity == "m2NDC_ha") %>%
ggplot(aes(x = year, y = sumsim, fill = commodity)) +
geom_bar(stat = "identity", colour="white") +
scale_y_continuous(name = "Harvested Area (ha)", labels = scales::comma) +
#facet_grid(.~state) +
ggtitle("IBGE Data")
psimy_long_sim %>%
filter(commodity == "DC_ha" | commodity == "maize_ha" | commodity == "soyNDC_ha" | commodity == "m2NDC_ha") %>%
ggplot(aes(x = year, y = sumsim, fill = commodity)) +
geom_bar(stat = "identity", colour="white", position="fill") +
scale_y_continuous(name = "Harvested Area (ha)", labels = scales::percent_format()) +
#facet_grid(.~state) +
ggtitle("IBGE Data")
psimy_long_sim %>%
ggplot(aes(x = year, y = sumsim, colour=commodity)) +
geom_line(size = 1) +
scale_y_continuous(name = "Harvested Area (ha)", labels = scales::comma) +
#facet_grid(.~state) +
ggtitle("IBGE Data")
DC production is maize2 production + proportion of soy production in DC
#maize data are by municiaplity for all states (Amunis)
maize_prod_Amunis_Data <- read_excel("ProductionData/maize_brazil.xlsx", sheet = "Production (tons)", skip = 1, na = c("", "-", "..."))
#maize data are by municiaplity for all states (Amunis)
maize2_prod_Amunis_Data <- read_excel("ProductionData/maize2_brazil.xlsx", sheet = "Production (tons)", skip = 1, na = c("", "-", "..."), col_types = c("numeric", "guess","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric")) #need to specify for 2010 column
#maize data are by municiaplity for all states (Amunis)
soy_prod_Amunis_Data <- read_excel("ProductionData/soybean_brazil.xlsx", sheet = "Production (Tons)", skip = 1, na = c("", "-", "..."))
#Manipulate data
#MAIZE
#tonss the same data strucutre (with some differences in unit conversions - could write function to cover both?)
maize_prod_Amunis <- maize_prod_Amunis_Data %>%
rename(muniID = `IBGE CODE`) %>%
filter(!is.na(muniID)) %>% #safer way to remove text line in muniID
mutate(state = substr(muniID, 1, 2)) %>% #extract the muniID
mutate_at(vars("2001":"2018"), as.numeric) %>% #convert values to numeric
dplyr::select(-Municipality) #drop unwanted columns
maize_prod_Astates <- maize_prod_Amunis %>%
group_by(state) %>%
summarise_all(sum, na.rm=T) %>%
dplyr::select(-muniID) #drop unwanted columns
maize_prod_Astates_long <- maize_prod_Astates %>%
gather(key = year, value = maize_tons, -state) %>%
mutate_at(vars(year), as.integer)
#MAIZE2
maize2_prod_Amunis <- maize2_prod_Amunis_Data %>%
rename(muniID = `IBGE CODE`) %>%
filter(!is.na(muniID)) %>% #safer way to remove text line in muniID
mutate(state = substr(muniID, 1, 2)) %>% #extract the muniID
mutate_at(vars("2003":"2015"), as.numeric) %>% #convert values to numeric
dplyr::select(-Municipality) #drop unwanted columns
maize2_prod_Astates <- maize2_prod_Amunis %>%
group_by(state) %>%
summarise_all(sum, na.rm=T) %>%
dplyr::select(-muniID) #drop unwanted columns
maize2_prod_Astates_long <- maize2_prod_Astates %>%
gather(key = year, value = maize2_tons, -state) %>%
mutate_at(vars(year), as.integer)
##SOY
soy_prod_Amunis <- soy_prod_Amunis_Data %>%
rename(muniID = `IBGE CODE`) %>%
filter(!is.na(muniID)) %>% #safer way to remove text line in muniID
mutate(state = substr(muniID, 1, 2)) %>% #extract the muniID
mutate_at(vars("2001":"2018"), as.numeric) %>% #convert values to numeric
dplyr::select(-Municipality) #drop unwanted columns
soy_prod_Astates <- soy_prod_Amunis %>%
group_by(state) %>%
summarise_all(sum, na.rm=T) %>%
dplyr::select(-muniID) #drop unwanted columns
soy_prod_Astates_long <- soy_prod_Astates %>%
gather(key = year, value = soy_tons, -state) %>%
mutate_at(vars(year), as.integer)
#create new data frame
prod_state_year <- left_join(maize_prod_Astates_long, maize2_prod_Astates_long, by = c("year", "state"))
prod_state_year <- left_join(prod_state_year, soy_prod_Astates_long, by = c("year", "state"))
prod_harv <- left_join(prod_state_year, harv_state_year_props, by = c("year", "state"))
Data from IBGE
##Load Area Planted Data
#maize data are by municiaplity for all states (Amunis)
maize_plant_Amunis_Data <- read_excel("ProductionData/maize_brazil.xlsx", sheet = "Planted area (hectares)", skip = 1, na = c("", "-", "..."))
#maize data are by municiaplity for all states (Amunis)
maize2_plant_Amunis_Data <- read_excel("ProductionData/maize2_brazil.xlsx", sheet = "Planted area (hectares)", skip = 1, na = c("", "-", "..."), col_types = c("numeric", "guess","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric")) #need to specify for 2010 column
#maize data are by municiaplity for all states (Amunis)
soy_plant_Amunis_Data <- read_excel("ProductionData/soybean_brazil.xlsx", sheet = "Planted area (hectares)", skip = 1, na = c("", "-", "..."))
Fstate_vals <- c(17, 29, 31, 35, 41, 42, 43, 50, 51, 52)
Fstate_abbrev <- c("TO", "BA", "MG", "SP", "PR", "SC", "RS", "MS", "MT", "GO")
#Manipulate data
#MAIZE
#has the same data strucutre (with some differences in unit conversions - could write function to cover both?)
maize_plant_Amunis <- maize_plant_Amunis_Data %>%
rename(muniID = `IBGE CODE`) %>%
filter(!is.na(muniID)) %>% #safer way to remove text line in muniID
mutate(state = substr(muniID, 1, 2)) %>% #extract the muniID
mutate_at(vars("2001":"2018"), as.numeric) %>% #convert values to numeric
dplyr::select(-Municipality) #drop unwanted columns
maize_plant_Astates <- maize_plant_Amunis %>%
group_by(state) %>%
summarise_all(sum, na.rm=T) %>%
dplyr::select(-muniID) #drop unwanted columns
maize_plant_Astates_long <- maize_plant_Astates %>%
gather(key = year, value = maize_ha, -state) %>%
mutate_at(vars(year), as.integer)
maize_plant_Amunis_long <- maize_plant_Amunis %>%
dplyr::select(-state) %>%
gather(key = year, value = maize_ha, -muniID) %>%
mutate_at(vars(year), as.integer)
#MAIZE2
maize2_plant_Amunis <- maize2_plant_Amunis_Data %>%
rename(muniID = `IBGE CODE`) %>%
filter(!is.na(muniID)) %>% #safer way to remove text line in muniID
mutate(state = substr(muniID, 1, 2)) %>% #extract the muniID
mutate_at(vars("2003":"2018"), as.numeric) %>% #convert values to numeric
dplyr::select(-Municipality) #drop unwanted columns
maize2_plant_Astates <- maize2_plant_Amunis %>%
group_by(state) %>%
summarise_all(sum, na.rm=T) %>%
dplyr::select(-muniID) #drop unwanted columns
maize2_plant_Astates_long <- maize2_plant_Astates %>%
gather(key = year, value = maize2_ha, -state) %>%
mutate_at(vars(year), as.integer)
maize2_plant_Amunis_long <- maize2_plant_Amunis %>%
dplyr::select(-state) %>%
gather(key = year, value = maize2_ha, -muniID) %>%
mutate_at(vars(year), as.integer)
##SOY
soy_plant_Amunis <- soy_plant_Amunis_Data %>%
rename(muniID = `IBGE CODE`) %>%
filter(!is.na(muniID)) %>% #safer way to remove text line in muniID
mutate(state = substr(muniID, 1, 2)) %>% #extract the muniID
mutate_at(vars("2001":"2018"), as.numeric) %>% #convert values to numeric
dplyr::select(-Municipality) #drop unwanted columns
soy_plant_Astates <- soy_plant_Amunis %>%
group_by(state) %>%
summarise_all(sum, na.rm=T) %>%
dplyr::select(-muniID) #drop unwanted columns
soy_plant_Astates_long <- soy_plant_Astates %>%
gather(key = year, value = soy_ha, -state) %>%
mutate_at(vars(year), as.integer)
soy_plant_Amunis_long <- soy_plant_Amunis %>%
dplyr::select(-state) %>%
gather(key = year, value = soy_ha, -muniID) %>%
mutate_at(vars(year), as.integer)
plant_muni_year <- left_join(maize_plant_Amunis_long, maize2_plant_Amunis_long, by = c("year", "muniID"))
plant_muni_year <- left_join(plant_muni_year, soy_plant_Amunis_long, by = c("year", "muniID"))
plant_muni_xxxx <- plant_muni_year %>%
filter(year == 2018) %>%
#filter(simulated == "TRUE") %>%
#dplyr::select(muniID, maize_ha, soy_ha) %>%
mutate(maize2_ha = replace(maize2_ha, is.na(maize2_ha),0)) %>%
mutate(maize_ha = replace(maize_ha, is.na(maize_ha),0)) %>%
mutate(soy_ha = replace(soy_ha, is.na(soy_ha),0)) %>%
mutate(soy_ha = if_else(soy_ha > maize2_ha,soy_ha - maize2_ha, 0)) %>%
mutate(total_ha = rowSums(dplyr::select(., ends_with("_ha")), na.rm=T)) %>%
mutate(maize_prop = round(maize_ha / total_ha,3)) %>%
mutate(soy_prop = round(soy_ha / total_ha,3)) %>%
mutate(cum_ms_prop = rowSums(select(., "maize_prop", "soy_prop"))) %>%
mutate(dc_prop = round(maize2_ha / total_ha,3))
write_csv(plant_muni_xxxx, "muni2018_plantedAreas_newDC.csv")