rm(list=ls())
library(tidyverse)
library(readxl)
Using Summary Table data for observed Land Cover (PastureB classification)
yrs <- seq(2001, 2018, 1)
#load the region file (used to match each cell to a municipality)
region <- read.csv("SummaryTables/region2001_noDC_HD_2019-10-28.csv")
#for debugging
#i <- 1
#loop through all years
for(i in seq_along(yrs)){
#if this is the first year, create lc_dat
if(i == 1){
lc_dat <- read_csv(paste0("SummaryTables/SummaryTable",yrs[i],"_PastureB_Disagg.csv")) #load empirical map summary data (created using summarise_LCmaps.r)
lc_dat <- lc_dat %>%
mutate(year = yrs[i])
}
#if this is not the first year, bind new year data to lc_dat
else {
lc <- read_csv(paste0("SummaryTables/SummaryTable",yrs[i],"_PastureB_Disagg.csv")) #load empirical map summary data (created using summarise_LCmaps.r)
lc <- lc %>%
mutate(year = yrs[i])
lc_dat <- bind_rows(lc_dat, lc)
}
}
#add state ID
lc_dat <- lc_dat %>%
mutate(state = (muniID %/% 100000)) %>%
mutate(state = if_else(state == 17, "TO",
if_else(state == 29, "BA",
if_else(state == 31, "MG",
if_else(state == 35, "SP",
if_else(state == 41, "PR",
if_else(state == 42, "SC",
if_else(state == 43, "RS",
if_else(state == 50, "MS",
if_else(state == 51, "MT",
if_else(state == 52, "GO", "NA"
))))))))))
)
#LC1 = Nature
#LC2 = Other Agri
#LC3 = Agri
#LC4 = Other
#LC5 = Pasture
#add observed cell count columns
lc_dat <- lc_dat %>%
mutate(Nature = round(LC1 * NonNAs,0)) %>%
mutate(OAgri = round(LC2 * NonNAs,0)) %>%
mutate(Agri = round(LC3 * NonNAs,0)) %>%
mutate(Other = round(LC4 * NonNAs,0)) %>%
mutate(Pasture = round(LC5 * NonNAs,0))
#make data long
lc_cells_long <- lc_dat %>%
select(year:Pasture) %>%
gather(key = LC, value = cells, -year, -state)
write_csv(lc_cells_long, "lc_cells_long.csv")
#calculate state totals
lc_cells_long_states <- lc_cells_long %>%
group_by(state, year,LC) %>%
summarise_at(vars(matches("cells")),sum, na.rm=T)
write_csv(lc_cells_long_states, "lc_cells_long_states2.csv")
Plot observed land cover by state through time
lc_cells_long_states %>%
ggplot(aes(x = year, y = cells, fill = LC)) +
geom_bar(stat = "identity") +
scale_y_continuous(name = "Cells", labels = scales::comma) +
facet_grid(.~state) +
ggtitle("Ten States Observed Cells") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
Plot observed land cover for all 10 simulation states through time
lc_cells_long_brazil <- lc_cells_long_states %>%
group_by(year,LC) %>%
summarise_at(vars(matches("cells")),sum)
#lc_cells_long_brazil %>%
# spread(key=LC, value=cells) %>%
# write_csv("SummaryTables_cells.csv")
lc_cells_long_brazil %>%
ggplot(aes(x = year, y = cells, fill = LC)) +
geom_bar(stat = "identity", colour="white") +
scale_y_continuous(name = "Cells", labels = scales::comma) +
#facet_grid(.~state) +
ggtitle("Ten States Observed Cells")
lc_cells_long_brazil %>%
ggplot(aes(x = year, y = cells, colour = LC)) +
geom_line(size = 1) +
scale_y_continuous(name = "Cells", labels = scales::comma) +
#facet_grid(.~state) +
ggtitle("Ten States Observed Cells")
Use production data from IBGE
##Load Production Data
meat_prod_Astates_Data <- read_excel("ProductionData/Cattle_meat_production_Kg_2000_2018_all_states.xlsx", sheet = "Plan1", skip = 1) #data for all states Astates
#dairy data are by municiaplity for all states (Amunis)
#dairy_prod_Amunis_Data <- read_excel("ProductionData/dairy_Municipalities_Brazil.xlsx", sheet = "Tabela", skip = 1, na = c("", "-", "..."))
#maize data are by municiaplity for all states (Amunis)
maize_prod_Amunis_Data <- read_excel("ProductionData/maize_brazil.xlsx", sheet = "Production (tons)", skip = 1, na = c("", "-", "..."))
#maize data are by municiaplity for all states (Amunis)
soy_prod_Amunis_Data <- read_excel("ProductionData/soybean_brazil.xlsx", sheet = "Production (Tons)", skip = 1, na = c("", "-", "..."))
Fstate_vals <- c(17, 29, 31, 35, 41, 42, 43, 50, 51, 52)
Fstate_abbrev <- c("TO", "BA", "MG", "SP", "PR", "SC", "RS", "MS", "MT", "GO")
Astate_codes <- meat_prod_Astates_Data %>%
dplyr::select(NM_UF_SIGLA, CD_GCUF) %>%
rename(state = NM_UF_SIGLA, stateid = CD_GCUF) %>%
filter(!is.na(state)) #safer way to remove text line at bottom of state column
##MEAT
meat_prod_Astates <- meat_prod_Astates_Data %>%
rename(state = NM_UF_SIGLA) %>%
dplyr::select(-NM_UF, -CD_GCUF) %>% #drop columns
filter(!is.na(state)) %>% #safer way to remove text line at bottom of state column
mutate_at(vars("2001":"2018"), as.numeric)
meat_prod_Astates_long <- meat_prod_Astates %>%
gather(key = year, value = meat_kg, -state) %>%
mutate_at(vars(year), as.integer) %>%
mutate(meat_gg = meat_kg * 0.000001) %>% #convert from kg to gg
dplyr::select(-meat_kg)
##DAIRY
# dairy_prod_Amunis <- dairy_prod_Amunis_Data %>%
# rename(muniID = `IBGE CODE`) %>%
# filter(!is.na(muniID)) %>% #safer way to remove text line in muniID
# mutate(state = substr(muniID, 1, 2)) %>% #extract the muniID
# mutate_at(vars("2000":"2015"), as.numeric) %>% #convert values to numeric
# dplyr::select(-Municipality) #drop unwanted columns
#
# dairy_prod_Astates <- dairy_prod_Amunis %>%
# group_by(state) %>%
# mutate_all(funs(. * 1.03 * 1000)) %>% #convert from thousand litres to kgs
# summarise_all(sum, na.rm=T) %>% #summarise munis to states
# mutate(state=replace(state, 1:length(Astate_codes$stateid), Astate_codes$state)) #re-label stated ids with state abbrevs
# dairy_prod_Astates_long <- dairy_prod_Astates %>%
# gather(key = year, value = dairy_kg, -state, -muniID) %>%
# mutate_at(vars(year), as.integer) %>%
# mutate(dairy_gg = dairy_kg * 0.000001) %>% #convert from kg to gg
# dplyr::select(-dairy_kg, -muniID)
#MAIZE
#has the same data strucutre (with some differences in unit conversions - could write function to cover both?)
maize_prod_Amunis <- maize_prod_Amunis_Data %>%
rename(muniID = `IBGE CODE`) %>%
filter(!is.na(muniID)) %>% #safer way to remove text line in muniID
mutate(state = substr(muniID, 1, 2)) %>% #extract the muniID
mutate_at(vars("2001":"2018"), as.numeric) %>% #convert values to numeric
dplyr::select(-Municipality) #drop unwanted columns
maize_prod_Astates <- maize_prod_Amunis %>%
group_by(state) %>%
summarise_all(sum, na.rm=T) %>% #summarise munis to states
mutate(state=replace(state, 1:length(Astate_codes$stateid), Astate_codes$state)) #re-label stated ids with state abbrevs
maize_prod_Astates_long <- maize_prod_Astates %>%
gather(key = year, value = maize_kg, -state, -muniID) %>%
mutate_at(vars(year), as.integer) %>%
mutate(maize_gg = maize_kg * 0.001) %>% #convert from tons to gg
dplyr::select(-maize_kg, -muniID)
##SOY
soy_prod_Amunis <- soy_prod_Amunis_Data %>%
rename(muniID = `IBGE CODE`) %>%
filter(!is.na(muniID)) %>% #safer way to remove text line in muniID
mutate(state = substr(muniID, 1, 2)) %>% #extract the muniID
mutate_at(vars("2001":"2018"), as.numeric) %>% #convert values to numeric
dplyr::select(-Municipality) #drop unwanted columns
soy_prod_Astates <- soy_prod_Amunis %>%
group_by(state) %>%
summarise_all(sum, na.rm=T) %>% #summarise munis to states
mutate(state=replace(state, 1:length(Astate_codes$stateid), Astate_codes$state)) #re-label stated ids with state abbrevs
soy_prod_Astates_long <- soy_prod_Astates %>%
gather(key = year, value = soy_kg, -state, -muniID) %>%
mutate_at(vars(year), as.integer) %>%
mutate(soy_gg = soy_kg * 0.001) %>% #convert from tons to gg
dplyr::select(-soy_kg, -muniID)
prod_state_year <- left_join(meat_prod_Astates_long, maize_prod_Astates_long, by = c("year", "state"))
prod_state_year <- left_join(prod_state_year, soy_prod_Astates_long, by = c("year", "state"))
#add focal states indicator
prod_state_year <- prod_state_year %>%
mutate(simulated = state %in% Fstate_abbrev)
psy_long <- prod_state_year %>%
gather(key = commodity, value = gg, -state, -year, -simulated)
psimy_long <- psy_long %>%
group_by(simulated, year, commodity) %>%
summarise(sumsim = sum(gg, na.rm=T))
prod_state_year
psimy_long_sim <- psimy_long %>%
filter(simulated == "TRUE")
psimy_long_sim %>%
ggplot(aes(x = year, y = sumsim, fill = commodity)) +
geom_bar(stat = "identity", colour="white") +
scale_y_continuous(name = "Production (gg)", labels = scales::comma) +
#facet_grid(.~state) +
ggtitle("Production Data")
psimy_long_sim %>%
ggplot(aes(x = year, y = sumsim, colour=commodity)) +
geom_line(size = 1) +
scale_y_continuous(name = "Production (gg)", labels = scales::comma) +
#facet_grid(.~state) +
ggtitle("Production Data")
For CRAFTY we assume the following values are the maximum possible production yields (gg per 25sq km): - Soy = 20
- Maize = 30 - Milk = 2.5 - Meat = 0.275
Using these values we can check what the observed production quantities would imply for the number of land cover cells.
#THESE ARE PERFECT VALUES! Assumes service production = 1
#first, combine maize2 with maize
psimy_long_sim <- psimy_long_sim %>%
spread(commodity, sumsim) %>%
gather(key = commodity, value = sumsim, -year, -simulated)
psimy_long_sim <- psimy_long_sim %>%
mutate(cells = if_else(commodity == "maize_gg", sumsim / 30,
if_else(commodity == "meat_gg", sumsim / 0.275, sumsim / 20)
)) %>%
mutate(commodity = if_else(commodity == "maize_gg", "maize",
if_else(commodity == "meat_gg", "meat", "soy"
))
)
psimy_long_sim %>%
ggplot(aes(x = year, y = cells, fill = commodity)) +
geom_bar(stat = "identity", colour="white") +
scale_y_continuous(name = "Production (cells)", labels = scales::comma) +
#facet_grid(.~state) +
ggtitle("Production Data: cells given perfect production")
psimy_long_sim %>%
ggplot(aes(x = year, y = cells, colour = commodity)) +
geom_line(size = 1) +
scale_y_continuous(name = "Production (cells)", labels = scales::comma) +
#facet_grid(.~state) +
ggtitle("Production Data: cells given perfect production")
psimy_wide_sim <- psimy_long_sim %>%
dplyr::select(-sumsim) %>%
spread(commodity, cells)
psimy_wide_sim
#write_csv(psimy_wide_sim, "TotalProduction.csv")
From the observed production data we see increasing required cells, from 20,000 to 30,000
To compare this estimated number of cells (from perfect production) with the number of cells observed in land cover maps, we first need to aggregeate the production data to ‘Agri’ to match the LC data (and set Pasture equal to meat)
psimy_wide_sim <- psimy_long_sim %>%
dplyr::select(-sumsim) %>%
spread(commodity, cells) %>%
mutate(Agri = maize + soy, Pasture = meat)
plcsim_long_sim <- psimy_wide_sim %>%
dplyr::select(year, Agri, Pasture) %>%
gather(key = LC, value = cells, -year, -simulated)
plcsim_long_sim %>%
ggplot(aes(x = year, y = cells, fill = LC)) +
geom_bar(stat = "identity", colour="white") +
scale_y_continuous(name = "Production(cells)", labels = scales::comma) +
#facet_grid(.~state) +
ggtitle("Production Data: cells given perfect production")
plcsim_long_sim %>%
ggplot(aes(x = year, y = cells, fill = LC)) +
geom_bar(stat = "identity", colour="white", position="fill") +
scale_y_continuous(name = "Production(cells)", labels = scales::percent_format()) +
#facet_grid(.~state) +
ggtitle("Production Data: cells given perfect production")
plcsim_long_sim %>%
ggplot(aes(x = year, y = cells, colour = LC)) +
geom_line(size = 1) +
scale_y_continuous(name = "Production (cells)", labels = scales::comma) +
#facet_grid(.~state) +
ggtitle("Production Data: cells given perfect production")
Now let’s see what we got in the observed land cover data
lc_cells_long_brazil %>%
filter(LC == "Agri" | LC == "Pasture") %>%
ggplot(aes(x = year, y = cells, fill = LC)) +
geom_bar(stat = "identity", colour="white") +
scale_y_continuous(name = "Cells", labels = scales::comma) +
#facet_grid(.~state) +
ggtitle("Land Cover Data Observed Cells")
lc_cells_long_brazil %>%
filter(LC == "Agri" | LC == "Pasture") %>%
ggplot(aes(x = year, y = cells, fill = LC)) +
geom_bar(stat = "identity", colour="white", position="fill") +
scale_y_continuous(name = "Cells", labels = scales::percent_format()) +
#facet_grid(.~state) +
ggtitle("Land Cover Data Observed Cells")
lc_cells_long_brazil %>%
filter(LC == "Agri" | LC == "Pasture") %>%
ggplot(aes(x = year, y = cells, colour = LC)) +
geom_line(size = 1) +
scale_y_continuous(name = "Cells", labels = scales::comma) +
#facet_grid(.~state) +
ggtitle("Land Cover Data Observed Cells")
Proportions are about right (Agri increases share through time) although observed land area in agriculture increases more consistently than according to that estimated from (perfect) production
However, in terms of absolute number of cells there are many more cells observed in the LC data than predicted by perfect production data.
Let’s look at these data in table format.
Values are number of cells (APratio
is Agri / Pasture
)
psimy_wide_sim <- psimy_wide_sim %>%
mutate(APratio = round(Agri / Pasture,3))
psimy_wide_sim
Values are number of cells (APratio
is Agri / Pasture
)
lc_cells_wide_brazil <- lc_cells_long_brazil %>%
spread(LC, cells) %>%
mutate(APratio = round(Agri / Pasture,3))
lc_cells_wide_brazil
Again, we see proportions are quite similar, but absolute numbers of cells are way off. This is likely because observed land was not producing ‘perfect’ yields. So, let’s see what assuming production conversion is not perfect does.
First let’s calculate what ‘production efficiency’ (i.e. proportion of ‘perfect’ production) the observed numbers of cells indicates.
joined <- left_join(plcsim_long_sim, lc_cells_long_brazil, by = c("year", "LC"))
joined <- joined %>%
mutate(prod_cells = round(cells.x,0)) %>%
dplyr::select(-cells.x) %>%
rename(lc_cells = cells.y) %>%
mutate(pe = prod_cells / lc_cells )
joined %>%
ggplot(aes(x = year, y = pe, colour = LC)) +
geom_line(size = 1) +
scale_y_continuous(name = "Proportion (of Perfect)", labels = scales::comma) +
#facet_grid(.~state) +
ggtitle("Production Efficiency")
joined
We see clear increasing trends for both Agri and Pasture.
Let’s plot the relationship between production (cells) and land cover (cells):
joined %>% ggplot(aes(x = prod_cells, y = lc_cells, colour=year, shape=LC)) +
geom_point() +
geom_abline(intercept = 0, slope = 1, color = "black") +
ggtitle("Perfect Conversion")
We see that for maximum (perfect) production, neither Agri nor Pasture correlate well.
When we plot the 40% conversion (below) we see Agri matches up a little better.
joined <- joined %>%
mutate(prod_cells40 = prod_cells / 0.4)
joined %>%
ggplot(aes(x = prod_cells40, y = lc_cells, colour=year, shape=LC)) +
geom_point() +
geom_abline(intercept = 0, slope = 1, color = "black") +
ggtitle("Imperfect Conversion (40%)")
Plot Agri only. We see early years fluctuate around 0.4, with improving conversion later.
joined %>%
filter(LC == "Agri") %>%
ggplot(aes(x = prod_cells40, y = lc_cells, colour=year, shape=LC)) +
geom_point() +
geom_abline(intercept = 0, slope = 1, color = "black") +
ggtitle("Imperfect Conversion (40%)")
Assuming 0.4 production efficiency for Soy and Maize, what is the number of cells that would be required given the observed production?
psimy_long_sim <- psimy_long_sim %>%
mutate(prod04_cells = cells / 0.4)
psimy_long_sim %>%
filter(commodity == "maize" | commodity == "soy") %>%
ggplot(aes(x = year, y = prod04_cells, colour = commodity)) +
geom_line(size = 1) +
scale_y_continuous(name = "Cells", labels = scales::comma) +
#facet_grid(.~state) +
ggtitle("Production Data: cells given 0.4 production")