rm(list=ls())
library(tidyverse)
library(readxl)

Using Summary Table data for observed Land Cover (PastureB classification)

yrs <- seq(2001, 2018, 1)

#load the region file (used to match each cell to a municipality)
region <- read.csv("SummaryTables/region2001_noDC_HD_2019-10-28.csv")

#for debugging
#i <- 1


#loop through all years
for(i in seq_along(yrs)){
  
  #if this is the first year, create lc_dat
  if(i == 1){
    lc_dat <- read_csv(paste0("SummaryTables/SummaryTable",yrs[i],"_PastureB_Disagg.csv"))   #load empirical map summary data (created using summarise_LCmaps.r)
    lc_dat <- lc_dat %>%
      mutate(year = yrs[i]) 
  }
  
  #if this is not the first year, bind new year data to lc_dat
  else {
    
    lc <- read_csv(paste0("SummaryTables/SummaryTable",yrs[i],"_PastureB_Disagg.csv"))   #load empirical map summary data (created using summarise_LCmaps.r)
    lc <- lc %>%
      mutate(year = yrs[i])
    
    lc_dat <- bind_rows(lc_dat, lc)
  }
  
}  

#add state ID
lc_dat <- lc_dat %>%
  mutate(state = (muniID %/% 100000)) %>%
  mutate(state = if_else(state == 17, "TO", 
      if_else(state == 29, "BA",
      if_else(state == 31, "MG",
      if_else(state == 35, "SP",
      if_else(state == 41, "PR",
      if_else(state == 42, "SC",
      if_else(state == 43, "RS", 
      if_else(state == 50, "MS",
      if_else(state == 51, "MT",
      if_else(state == 52, "GO", "NA"
      ))))))))))
    )

#LC1 = Nature
#LC2 = Other Agri
#LC3 = Agri
#LC4 = Other
#LC5 = Pasture

#add observed cell count columns  
lc_dat <- lc_dat %>%
      mutate(Nature = round(LC1 * NonNAs,0)) %>%
      mutate(OAgri = round(LC2 * NonNAs,0)) %>%
      mutate(Agri = round(LC3 * NonNAs,0)) %>%
      mutate(Other = round(LC4 * NonNAs,0)) %>%
      mutate(Pasture = round(LC5 * NonNAs,0))

#make data long
lc_cells_long <- lc_dat %>%
  select(year:Pasture) %>%
  gather(key = LC, value = cells, -year, -state)

write_csv(lc_cells_long, "lc_cells_long.csv") 

#calculate state totals
lc_cells_long_states <- lc_cells_long %>% 
  group_by(state, year,LC) %>%
  summarise_at(vars(matches("cells")),sum, na.rm=T)

write_csv(lc_cells_long_states, "lc_cells_long_states2.csv") 

Plot observed land cover by state through time

lc_cells_long_states %>% 
  ggplot(aes(x = year, y = cells, fill = LC)) + 
  geom_bar(stat = "identity") +
  scale_y_continuous(name = "Cells", labels = scales::comma) +
  facet_grid(.~state) +
  ggtitle("Ten States Observed Cells") + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5))

Plot observed land cover for all 10 simulation states through time

lc_cells_long_brazil <- lc_cells_long_states %>% 
  group_by(year,LC) %>%
  summarise_at(vars(matches("cells")),sum)

#lc_cells_long_brazil %>% 
#  spread(key=LC, value=cells) %>%
#  write_csv("SummaryTables_cells.csv")

lc_cells_long_brazil %>% 
  ggplot(aes(x = year, y = cells, fill = LC)) + 
  geom_bar(stat = "identity", colour="white") +
  scale_y_continuous(name = "Cells", labels = scales::comma) +
  #facet_grid(.~state) +
  ggtitle("Ten States Observed Cells")

lc_cells_long_brazil %>% 
  ggplot(aes(x = year, y = cells, colour = LC)) + 
  geom_line(size = 1) +
  scale_y_continuous(name = "Cells", labels = scales::comma) +
  #facet_grid(.~state) +
  ggtitle("Ten States Observed Cells")

Use production data from IBGE

##Load Production Data
meat_prod_Astates_Data <- read_excel("ProductionData/Cattle_meat_production_Kg_2000_2018_all_states.xlsx", sheet = "Plan1", skip = 1)  #data for all states Astates

#dairy data are by municiaplity for all states (Amunis)
#dairy_prod_Amunis_Data <- read_excel("ProductionData/dairy_Municipalities_Brazil.xlsx", sheet = "Tabela", skip = 1, na = c("", "-", "..."))

#maize data are by municiaplity for all states (Amunis)
maize_prod_Amunis_Data <- read_excel("ProductionData/maize_brazil.xlsx", sheet = "Production (tons)", skip = 1, na = c("", "-", "..."))


#maize data are by municiaplity for all states (Amunis)
soy_prod_Amunis_Data <- read_excel("ProductionData/soybean_brazil.xlsx", sheet = "Production (Tons)", skip = 1, na = c("", "-", "..."))


Fstate_vals <- c(17,    29, 31, 35, 41, 42, 43, 50, 51, 52)
Fstate_abbrev <- c("TO", "BA", "MG", "SP", "PR",  "SC", "RS", "MS", "MT", "GO")

Astate_codes <- meat_prod_Astates_Data %>%
  dplyr::select(NM_UF_SIGLA, CD_GCUF) %>%
  rename(state = NM_UF_SIGLA, stateid = CD_GCUF) %>%
  filter(!is.na(state))    #safer way to remove text line at bottom of state column
  

##MEAT
meat_prod_Astates <- meat_prod_Astates_Data %>%
  rename(state = NM_UF_SIGLA) %>%
  dplyr::select(-NM_UF, -CD_GCUF) %>%      #drop columns
  filter(!is.na(state)) %>%   #safer way to remove text line at bottom of state column
  mutate_at(vars("2001":"2018"), as.numeric) 

meat_prod_Astates_long <- meat_prod_Astates %>%
   gather(key = year, value = meat_kg, -state) %>%
   mutate_at(vars(year), as.integer) %>%
   mutate(meat_gg = meat_kg * 0.000001) %>%  #convert from kg to gg
   dplyr::select(-meat_kg)

##DAIRY
# dairy_prod_Amunis <- dairy_prod_Amunis_Data %>%
#   rename(muniID = `IBGE CODE`) %>%
#   filter(!is.na(muniID)) %>%   #safer way to remove text line in muniID
#   mutate(state = substr(muniID, 1, 2)) %>%     #extract the muniID
#   mutate_at(vars("2000":"2015"), as.numeric) %>%  #convert values to numeric
#   dplyr::select(-Municipality)   #drop unwanted columns
# 
# dairy_prod_Astates <- dairy_prod_Amunis %>%
#   group_by(state) %>%
#   mutate_all(funs(. * 1.03 * 1000)) %>%     #convert from thousand litres to kgs
#   summarise_all(sum, na.rm=T) %>%    #summarise munis to states
#   mutate(state=replace(state, 1:length(Astate_codes$stateid), Astate_codes$state)) #re-label stated ids with state abbrevs

# dairy_prod_Astates_long <- dairy_prod_Astates %>%
#   gather(key = year, value = dairy_kg, -state, -muniID) %>%
#   mutate_at(vars(year), as.integer) %>%
#   mutate(dairy_gg = dairy_kg * 0.000001) %>%  #convert from kg to gg
#   dplyr::select(-dairy_kg, -muniID)


#MAIZE
#has the same data strucutre (with some differences in unit conversions - could write function to cover both?) 
maize_prod_Amunis <- maize_prod_Amunis_Data %>%
  rename(muniID = `IBGE CODE`) %>%
  filter(!is.na(muniID)) %>%   #safer way to remove text line in muniID
  mutate(state = substr(muniID, 1, 2)) %>%     #extract the muniID
  mutate_at(vars("2001":"2018"), as.numeric) %>%  #convert values to numeric
  dplyr::select(-Municipality)   #drop unwanted columns

maize_prod_Astates <- maize_prod_Amunis %>%
  group_by(state) %>%
  summarise_all(sum, na.rm=T) %>%    #summarise munis to states
  mutate(state=replace(state, 1:length(Astate_codes$stateid), Astate_codes$state)) #re-label stated ids with state abbrevs

maize_prod_Astates_long <- maize_prod_Astates %>%
  gather(key = year, value = maize_kg, -state, -muniID) %>%
  mutate_at(vars(year), as.integer) %>%
  mutate(maize_gg = maize_kg * 0.001) %>%  #convert from tons to gg
  dplyr::select(-maize_kg, -muniID)

##SOY
soy_prod_Amunis <- soy_prod_Amunis_Data %>%
  rename(muniID = `IBGE CODE`) %>%
  filter(!is.na(muniID)) %>%   #safer way to remove text line in muniID
  mutate(state = substr(muniID, 1, 2)) %>%     #extract the muniID
  mutate_at(vars("2001":"2018"), as.numeric) %>%  #convert values to numeric
  dplyr::select(-Municipality)   #drop unwanted columns

soy_prod_Astates <- soy_prod_Amunis %>%
  group_by(state) %>%
  summarise_all(sum, na.rm=T) %>%    #summarise munis to states
  mutate(state=replace(state, 1:length(Astate_codes$stateid), Astate_codes$state)) #re-label stated ids with state abbrevs

soy_prod_Astates_long <- soy_prod_Astates %>%
  gather(key = year, value = soy_kg, -state, -muniID) %>%
  mutate_at(vars(year), as.integer) %>%
  mutate(soy_gg = soy_kg * 0.001) %>%  #convert from tons to gg
  dplyr::select(-soy_kg, -muniID)

prod_state_year <- left_join(meat_prod_Astates_long, maize_prod_Astates_long, by = c("year", "state"))

prod_state_year <- left_join(prod_state_year, soy_prod_Astates_long, by = c("year", "state"))

#add focal states indicator
prod_state_year <- prod_state_year %>%
  mutate(simulated = state %in% Fstate_abbrev) 

psy_long <- prod_state_year %>%
  gather(key = commodity, value = gg, -state, -year, -simulated)

psimy_long <- psy_long %>%
  group_by(simulated, year, commodity) %>%
  summarise(sumsim = sum(gg, na.rm=T))

prod_state_year
psimy_long_sim <- psimy_long %>% 
  filter(simulated == "TRUE") 

psimy_long_sim %>% 
  ggplot(aes(x = year, y = sumsim, fill = commodity)) + 
  geom_bar(stat = "identity", colour="white") +
  scale_y_continuous(name = "Production (gg)", labels = scales::comma) +
  #facet_grid(.~state) +
  ggtitle("Production Data")

psimy_long_sim %>% 
  ggplot(aes(x = year, y = sumsim, colour=commodity)) + 
  geom_line(size = 1) +
  scale_y_continuous(name = "Production (gg)", labels = scales::comma) +
  #facet_grid(.~state) +
  ggtitle("Production Data")

For CRAFTY we assume the following values are the maximum possible production yields (gg per 25sq km): - Soy = 20
- Maize = 30 - Milk = 2.5 - Meat = 0.275

Using these values we can check what the observed production quantities would imply for the number of land cover cells.

#THESE ARE PERFECT VALUES! Assumes service production = 1 

#first, combine maize2 with maize
psimy_long_sim  <- psimy_long_sim %>%
  spread(commodity, sumsim) %>%
  gather(key = commodity, value = sumsim, -year, -simulated)


psimy_long_sim  <- psimy_long_sim %>%
  mutate(cells = if_else(commodity == "maize_gg", sumsim / 30,
      if_else(commodity == "meat_gg", sumsim / 0.275, sumsim / 20)
      )) %>%
  mutate(commodity = if_else(commodity == "maize_gg", "maize",
      if_else(commodity == "meat_gg", "meat", "soy"
      ))
  )

psimy_long_sim %>% 
  ggplot(aes(x = year, y = cells, fill = commodity)) + 
  geom_bar(stat = "identity", colour="white") +
  scale_y_continuous(name = "Production (cells)", labels = scales::comma) +
  #facet_grid(.~state) +
  ggtitle("Production Data: cells given perfect production")

psimy_long_sim %>% 
  ggplot(aes(x = year, y = cells, colour = commodity)) + 
  geom_line(size = 1) +
  scale_y_continuous(name = "Production (cells)", labels = scales::comma) +
  #facet_grid(.~state) +
  ggtitle("Production Data: cells given perfect production")

psimy_wide_sim  <- psimy_long_sim %>%
  dplyr::select(-sumsim) %>%
  spread(commodity, cells) 

psimy_wide_sim
#write_csv(psimy_wide_sim, "TotalProduction.csv")

From the observed production data we see increasing required cells, from 20,000 to 30,000

To compare this estimated number of cells (from perfect production) with the number of cells observed in land cover maps, we first need to aggregeate the production data to ‘Agri’ to match the LC data (and set Pasture equal to meat)

psimy_wide_sim  <- psimy_long_sim %>%
  dplyr::select(-sumsim) %>%
  spread(commodity, cells) %>%
  mutate(Agri = maize + soy, Pasture = meat)

plcsim_long_sim <- psimy_wide_sim %>%
  dplyr::select(year, Agri, Pasture) %>%
  gather(key = LC, value = cells, -year, -simulated) 


plcsim_long_sim %>% 
  ggplot(aes(x = year, y = cells, fill = LC)) + 
  geom_bar(stat = "identity", colour="white") +
  scale_y_continuous(name = "Production(cells)", labels = scales::comma) +
  #facet_grid(.~state) +
  ggtitle("Production Data: cells given perfect production")

plcsim_long_sim %>% 
  ggplot(aes(x = year, y = cells, fill = LC)) + 
  geom_bar(stat = "identity", colour="white", position="fill") +
  scale_y_continuous(name = "Production(cells)", labels = scales::percent_format()) +
  #facet_grid(.~state) +
  ggtitle("Production Data: cells given perfect production")

plcsim_long_sim %>% 
  ggplot(aes(x = year, y = cells, colour = LC)) + 
  geom_line(size = 1) +
  scale_y_continuous(name = "Production (cells)", labels = scales::comma) +
  #facet_grid(.~state) +
  ggtitle("Production Data: cells given perfect production")

Now let’s see what we got in the observed land cover data

lc_cells_long_brazil %>% 
  filter(LC == "Agri" | LC == "Pasture") %>%
  ggplot(aes(x = year, y = cells, fill = LC)) + 
  geom_bar(stat = "identity", colour="white") +
  scale_y_continuous(name = "Cells", labels = scales::comma) +
  #facet_grid(.~state) +
  ggtitle("Land Cover Data Observed Cells")

lc_cells_long_brazil %>% 
  filter(LC == "Agri" | LC == "Pasture") %>%
  ggplot(aes(x = year, y = cells, fill = LC)) + 
  geom_bar(stat = "identity", colour="white", position="fill") +
  scale_y_continuous(name = "Cells", labels = scales::percent_format()) +
  #facet_grid(.~state) +
  ggtitle("Land Cover Data Observed Cells")

lc_cells_long_brazil %>% 
  filter(LC == "Agri" | LC == "Pasture") %>%
  ggplot(aes(x = year, y = cells, colour = LC)) + 
  geom_line(size = 1) +
  scale_y_continuous(name = "Cells", labels = scales::comma) +
  #facet_grid(.~state) +
  ggtitle("Land Cover Data Observed Cells")

Proportions are about right (Agri increases share through time) although observed land area in agriculture increases more consistently than according to that estimated from (perfect) production

However, in terms of absolute number of cells there are many more cells observed in the LC data than predicted by perfect production data.

Let’s look at these data in table format.

Production Data

Values are number of cells (APratio is Agri / Pasture)

psimy_wide_sim <- psimy_wide_sim %>%  
  mutate(APratio = round(Agri / Pasture,3))

psimy_wide_sim

Land Cover Data

Values are number of cells (APratio is Agri / Pasture)

lc_cells_wide_brazil <- lc_cells_long_brazil %>%
  spread(LC, cells) %>%
  mutate(APratio = round(Agri / Pasture,3))

lc_cells_wide_brazil 

Again, we see proportions are quite similar, but absolute numbers of cells are way off. This is likely because observed land was not producing ‘perfect’ yields. So, let’s see what assuming production conversion is not perfect does.

First let’s calculate what ‘production efficiency’ (i.e. proportion of ‘perfect’ production) the observed numbers of cells indicates.

joined <- left_join(plcsim_long_sim, lc_cells_long_brazil, by = c("year", "LC"))

joined <- joined %>%
  mutate(prod_cells = round(cells.x,0)) %>% 
  dplyr::select(-cells.x) %>%
  rename(lc_cells = cells.y) %>% 
  mutate(pe = prod_cells / lc_cells )

joined %>%
  ggplot(aes(x = year, y = pe, colour = LC)) + 
  geom_line(size = 1) +
  scale_y_continuous(name = "Proportion (of Perfect)", labels = scales::comma) +
  #facet_grid(.~state) +
  ggtitle("Production Efficiency")

joined

We see clear increasing trends for both Agri and Pasture.

Let’s plot the relationship between production (cells) and land cover (cells):

joined %>% ggplot(aes(x = prod_cells, y = lc_cells, colour=year, shape=LC)) + 
  geom_point() +
  geom_abline(intercept = 0, slope = 1, color = "black") +
  ggtitle("Perfect Conversion")

We see that for maximum (perfect) production, neither Agri nor Pasture correlate well.

When we plot the 40% conversion (below) we see Agri matches up a little better.

joined <- joined %>%
  mutate(prod_cells40 = prod_cells / 0.4) 

joined %>%
  ggplot(aes(x = prod_cells40, y = lc_cells, colour=year, shape=LC)) + 
  geom_point() +
  geom_abline(intercept = 0, slope = 1, color = "black") +
  ggtitle("Imperfect Conversion (40%)")

Plot Agri only. We see early years fluctuate around 0.4, with improving conversion later.

joined %>%
  filter(LC == "Agri") %>%
  ggplot(aes(x = prod_cells40, y = lc_cells, colour=year, shape=LC)) + 
  geom_point() +
  geom_abline(intercept = 0, slope = 1, color = "black") +
  ggtitle("Imperfect Conversion (40%)")

Assuming 0.4 production efficiency for Soy and Maize, what is the number of cells that would be required given the observed production?

psimy_long_sim  <- psimy_long_sim %>%
  mutate(prod04_cells = cells / 0.4)


psimy_long_sim %>% 
  filter(commodity == "maize" | commodity == "soy") %>%
  ggplot(aes(x = year, y = prod04_cells, colour = commodity)) + 
  geom_line(size = 1) +
  scale_y_continuous(name = "Cells", labels = scales::comma) +
  #facet_grid(.~state) +
  ggtitle("Production Data: cells given 0.4 production")