rm(list=ls())
library(tidyverse)
library(readxl)
##Load Production Data
meat_prod_Astates_Data <- read_excel("Cattle_meat_production_Kg_2000_2017_all_states.xlsx", sheet = "Plan1", skip = 1)  #data for all states Astates

#dairy data are by municiaplity for all states (Amunis)
dairy_prod_Amunis_Data <- read_excel("dairy_Municipalities_Brazil.xlsx", sheet = "Tabela", skip = 1, na = c("", "-", "..."))

##Tidy production data
meat_prod_Astates <- meat_prod_Astates_Data %>%
  rename(state = NM_UF_SIGLA) %>%
  select(-NM_UF, -CD_GCUF, -`2016`, -`2017`) %>%      #drop columns
  filter(!is.na(state)) %>%   #safer way to remove text line at bottom of state column
  mutate_at(vars("2000":"2015"), as.numeric) 

Fstate_vals <- c(17,    29, 31, 35, 41, 42, 43, 50, 51, 52)
Fstate_abbrev <- c("TO", "BA", "MG", "SP", "PR",  "SC", "RS", "MS", "MT", "GO")

#Filter meat focal states
meat_prod_Fstates <- meat_prod_Astates %>%
  filter(state %in% Fstate_abbrev) 

meat_prod_Fstates_long <- meat_prod_Fstates %>%
  gather(key = year, value = meat_kg, -state) %>%
  mutate_at(vars(year), as.integer) %>%
  mutate(meat_gg = meat_kg * 0.000001) %>%  #convert from kg to gg
  select(-meat_kg)


dairy_prod_Amunis <- dairy_prod_Amunis_Data %>%
  rename(muniID = `IBGE CODE`) %>%
  filter(!is.na(muniID)) %>%   #safer way to remove text line in muniID 
  mutate(state = substr(muniID, 1, 2)) %>%     #extract the muniID
  mutate_at(vars("2000":"2015"), as.numeric) %>%  #convert values to numeric
  select(-muniID, -Municipality)   #drop unwanted states

dairy_prod_Astates <- dairy_prod_Amunis %>% 
  group_by(state) %>%  
  mutate_all(funs(. * 1.03 * 1000)) %>%     #convert from thousand litres to kgs
  summarise_all(sum, na.rm=T)    #summarise munis to states

dairy_prod_Fstates<- dairy_prod_Astates %>%
  filter(state %in% Fstate_vals) %>% 
  mutate(state=replace(state, 1:length(Fstate_vals), Fstate_abbrev))

dairy_prod_Fstates_long <- dairy_prod_Fstates %>%
  gather(key = year, value = dairy_kg, -state) %>%
  mutate_at(vars(year), as.integer) %>%
  mutate(dairy_gg = dairy_kg * 0.000001) %>%  #convert from kg to gg
  select(-dairy_kg)


prod_state_year <- left_join(meat_prod_Fstates_long, dairy_prod_Fstates_long, by = c("year", "state"))

psy_long <- prod_state_year %>%
  gather(key = commodity, value = gg, -state, - year)

Plots

##Plot data

ggplot(psy_long, aes(x = year, y = gg, fill = commodity)) + 
  geom_bar(position = "fill",stat = "identity", colour="white") +
  scale_y_continuous(name = "Proportion of Total", labels = scales::percent_format()) +
  facet_grid(.~state) +
  ggtitle("Meat vs Dairy")

##Proportions
prop_sy <- prod_state_year %>%
  mutate(prop = dairy_gg / (dairy_gg + meat_gg))

prop_s <- prop_sy %>%
  group_by(state) %>%
  summarise(
    prop_mn = round(mean(prop),3),
    prop_md = round(median(prop),3),
    prop_max = round(max(prop),3),
    prop_sd = round(sd(prop),3),
    prop_se = round(sd(prop) / length(prop), 3)
  )

ggplot(prop_s, aes(x = state, y = prop_mn)) + 
  geom_bar(stat="identity", colour="white") +
  geom_errorbar(aes(ymin=(prop_mn-(prop_se*1.96)), ymax=(prop_mn+(prop_se*1.96))), width=0.25) +
  scale_y_continuous(name="Proportion", labels = scales::comma) +
  ggtitle("Mean Proportion (Dairy of Total Production)")

ggplot(prop_s, aes(x = state, y = prop_md)) + 
  geom_bar(stat="identity", colour="white") +
  scale_y_continuous(name="Proportion", labels = scales::comma) +
  ggtitle("Median Proportion (Dairy of Total Production)")

ggplot(prop_s, aes(x = state, y = prop_max)) + 
  geom_bar(stat="identity", colour="white") +
  scale_y_continuous(name="Proportion", labels = scales::comma) +
  ggtitle("Maximum Proportion (Dairy of Total Production)")

prop_s