### Paquetes ----
pacman::p_load(tidyverse, scales, ggtext, ggrepel, gridExtra)

## Setup
Sys.setlocale("LC_ALL", "es_ES.UTF-8") # Mac
Sys.setlocale("LC_ALL", "Spanish_Mexico.1252") # Windows
options(scipen=999) 

## Eliminar objetos
rm(list = ls())

### Fuente----
windowsFonts(A = windowsFont("Roboto Condensed"))

### Data----

# Base de 2021
db <- read_csv("C://Users/pablo/RStudio/2022/metrica/1_data/Resultados Métrica 2021_Sujeto_Obligado-4.csv")

db = db %>% 
  mutate(estado = case_when(estado == "Coahuila de Zaragoza" ~ "Coahuila",
                            estado == "Michoacán de Ocampo" ~ "Michoacán",
                            estado == "Veracruz de Ignacio de la Llave" ~ "Veracruz",
                            estado == "México" ~ "Estado de México",
                            is.na(estado) ~ "Error",
                            T ~ estado)) %>% 
  filter(estado != "Error")

db_join = db %>% 
  mutate(id = paste(estado, tipo, so, sep = "_")) %>% 
  mutate(num = 1:nrow(db)) %>% 
  select(id, num)


# Base de años previos
db1 <- haven::read_dta("C://Users/pablo/RStudio/2022/metrica/1_data/ResultadosMétrica2017y2019.dta") %>% 
  mutate(estado = case_when(estado == "Coahuila de Zaragoza" ~ "Coahuila",
                            estado == "Michoacán de Ocampo" ~ "Michoacán",
                            estado == "Veracruz de Ignacio de la Llave" ~ "Veracruz",
                            estado == "México" ~ "Estado de México",
                            T ~ estado))
db19 = db1 %>% 
  rename(ga_indice = ga_indice19,
         p_indice = p_subindice19,
         pg_indice = pg_subindice19,
         pg_mec = pg_mec19,
         pg_formato = pg_formato19,
         pg_func = pg_func19,
         pg_seg = pg_seg19,
         pg_actores = pg_actores19,
         pc_indice = pc_subindice19,
         pc_metodos = pc_metodos19,
         pc_respuesta = pc_respuesta19,
         pc_activacion = pc_activacion19,
         pc_celeridad = pc_celeridad19,
         t_indice = t_subindice19,
         tg_indice = tg_subindice19,
         tgai_indice = tgai_componente19,
         tgda_indice = tgda_componente19,
         tgta_indice = tgta_componente19,
         tc_indice = tc_subindice19,
         tcai_indice = tcai_componente19,
         tctp_indice = tctp_componente19) %>% 
  mutate(id = paste(estado, tipo, so, sep = "_"))

db17 = db1 %>% 
  rename(ga_indice = ga_indice17,
         p_indice = p_subindice17,
         pg_indice = pg_subindice17,
         pc_indice = pc_subindice17,
         t_indice = t_subindice17,
         tg_indice = tg_subindice17,
         # tgai_indice = tgai_componente17,
         # tgda_indice = tgda_componente17,
         # tgta_indice = tgta_componente19,
         tc_indice = tc_subindice17,
         # tcai_indice = tcai_componente19,
         # tctp_indice = tctp_componente19
  ) %>% 
  mutate(id = paste(estado, tipo, so, sep = "_")) 


### G1----
db21 = db %>% 
  select(estado, so, tipo, ga_indice) %>% 
  mutate(year = 2021)

p_21 = mean(db21$ga_indice)
p_21_ = as.numeric(str_sub(signif(mean(db21$ga_indice), 3), end = -2))

db_19 = db19 %>% 
  select(estado:tipo, ga_indice) %>% 
  mutate(year = 2019)

db_17 = db17 %>% 
  select(estado:tipo, ga_indice) %>% 
  mutate(year = 2017)

p1 = db21 %>% 
  rbind(db_19, db_17) %>% 
  group_by(estado, year) %>% 
  summarise(mean = mean(ga_indice, na.rm = T)) %>% 
  ungroup() %>% 
  pivot_wider(names_from = "year",
              values_from = "mean") %>% 
  janitor::clean_names() %>% 
  mutate(short = str_replace_all(estado,
                                 c("Aguascalientes" = "AGS",
                                   "Baja California Sur" = "BCS",
                                   "Baja California" = "BC",
                                   "Campeche" = "CAM",
                                   "Chiapas" = "CHP",
                                   "Chihuahua" = "CHH",
                                   "Ciudad de México" = "CDMX",
                                   "Coahuila" = "COA",
                                   "Colima" = "COL",
                                   "Durango"= "DUR",
                                   "Guanajuato" = "GUA",
                                   "Guerrero" = "GUE",
                                   "Hidalgo" = "HID",
                                   "Jalisco" = "JAL",
                                   "Federal" = "FED",
                                   "Estado de México" = "MEX",
                                   "Michoacán" = "MIC",
                                   "Morelos" = "MOR",
                                   "Nayarit" = "NAY",
                                   "Nuevo León" = "NL",
                                   "Oaxaca" = "OAX",
                                   "Puebla" = "PUE",
                                   "Quintana Roo" = "ROO",
                                   "Querétaro" = "QUE",
                                   "San Luis Potosí" = "SLP",
                                   "Sinaloa" = "SIN",
                                   "Sonora" = "SON",
                                   "Tabasco" = "TAB",
                                   "Tamaulipas" = "TAM",
                                   "Tlaxcala" = "TLA",
                                   "Veracruz" = "VER",
                                   "Yucatán" = "YUC",
                                   "Zacatecas" = "ZAC"))) %>% 
  mutate(x2019 = x2021-x2019) %>% 
  ggplot(aes(y = x2019,
             x = x2021)) + 
  geom_point(alpha = 0.3, col = "grey0") +
  geom_vline(xintercept = p_21,
             size = 1,
             linetype  = 2,
             alpha = 0.6) +
  geom_hline(yintercept = 0,
             size = 1,
             linetype  = 2,
             alpha = 0.6,
             col = "grey0") +
  annotate("text",
           x = p_21,
           y = .15,
           label = p_21_,
           size = 3.5,
           hjust = -.15,
           fontface = "bold",
           alpha = .8, family = "A") +
  geom_text_repel(aes(label = short),
                  fontface = "bold", size = 2.8,
                  vjust = .5,
                  hjust = -.5,
                  col = "grey0",
                  segment.color = "grey40",
                  segment.alpha = 0.4,
                  show.legend = F, family = "A") +
  scale_y_continuous(breaks = c(seq(-.2,.2,.05))) + 
  scale_x_continuous(breaks = c(seq(0,1,.1))) + 
  labs(x = "Índice obtenido en 2021",
       y = "Diferencia entre 2021 y 2019",
       y = NULL,
       fill = "Diferencia\nentre años"
  ) + 
  theme_light() +
  theme(legend.position = "right",
        legend.text = element_text(size = 12),
        axis.title = element_text(size = 10, face = "bold"),
        axis.title.y = element_text(color = "grey0"),
        plot.title = element_text(size = 18, face = "bold"),
        plot.caption = element_markdown(hjust = 0),
        text = element_text(family = "A"))

p2 = db21 %>% 
  rbind(db_19, db_17) %>% 
  group_by(estado, year) %>% 
  summarise(mean = mean(ga_indice, na.rm = T)) %>% 
  ungroup() %>% 
  pivot_wider(names_from = "year",
              values_from = "mean") %>% 
  janitor::clean_names() %>% 
  mutate(short = str_replace_all(estado,
                                 c("Aguascalientes" = "AGS",
                                   "Baja California Sur" = "BCS",
                                   "Baja California" = "BC",
                                   "Campeche" = "CAM",
                                   "Chiapas" = "CHP",
                                   "Chihuahua" = "CHH",
                                   "Ciudad de México" = "CDMX",
                                   "Coahuila" = "COA",
                                   "Colima" = "COL",
                                   "Durango"= "DUR",
                                   "Guanajuato" = "GUA",
                                   "Guerrero" = "GUE",
                                   "Hidalgo" = "HID",
                                   "Jalisco" = "JAL",
                                   "Federal" = "FED",
                                   "Estado de México" = "MEX",
                                   "Michoacán" = "MIC",
                                   "Morelos" = "MOR",
                                   "Nayarit" = "NAY",
                                   "Nuevo León" = "NL",
                                   "Oaxaca" = "OAX",
                                   "Puebla" = "PUE",
                                   "Quintana Roo" = "ROO",
                                   "Querétaro" = "QUE",
                                   "San Luis Potosí" = "SLP",
                                   "Sinaloa" = "SIN",
                                   "Sonora" = "SON",
                                   "Tabasco" = "TAB",
                                   "Tamaulipas" = "TAM",
                                   "Tlaxcala" = "TLA",
                                   "Veracruz" = "VER",
                                   "Yucatán" = "YUC",
                                   "Zacatecas" = "ZAC"))) %>% 
  mutate(x2019 = x2021-x2017)  %>% 
  ggplot(aes(y = x2019,
             x = x2021)) + 
  geom_point(alpha = 0.3, col = "grey0") +
  geom_vline(xintercept = p_21,
             size = 1,
             linetype  = 2,
             alpha = 0.6) +
  geom_hline(yintercept = 0,
             size = 1,
             linetype  = 2,
             alpha = 0.6,
             col = "grey0") +
  annotate("text",
           x = p_21,
           y = .25,
           label = p_21_,
           size = 3.5,
           hjust = -.15,
           fontface = "bold",
           alpha = .8, family = "A") +
  geom_text_repel(aes(label = short),
                  fontface = "bold", size = 2.8,
                  vjust = .5,
                  hjust = -.5,
                  col = "grey0",
                  segment.color = "grey40",
                  segment.alpha = 0.4,
                  show.legend = F, family = "A") +
  scale_y_continuous(breaks = c(seq(-.4,.6,.1))) + 
  scale_x_continuous(breaks = c(seq(0,1,.1))) + 
  labs(x = "Índice obtenido en 2021",
       y = "Diferencia entre 2021 y 2017",
       y = NULL,
       fill = "Diferencia\nentre años"
  ) + 
  theme_light() +
  theme(legend.position = "right",
        legend.text = element_text(size = 12),
        axis.title = element_text(size = 10, face = "bold"),
        axis.title.y = element_text(color = "grey0"),
        plot.title = element_text(size = 18, face = "bold"),
        plot.caption = element_markdown(hjust = 0),
        text = element_text(family = "A"))

a = grid.arrange(p2, p1, nrow = 1)

ggsave(plot = a,
       "3_viz/g1.png",
       width = 10,
       height = 6,
       dpi = 900)


### G2----
db21 = db %>% 
  mutate(id = paste(estado, tipo, so, sep = "_")) %>% 
  select(id, estado, so, tipo, ga_indice) %>% 
  mutate(year = 2021)

p_21 = mean(db21$ga_indice)
p_21_ = as.numeric(str_sub(signif(mean(db21$ga_indice), 3), end = -2))

db_19 = db19 %>% 
  select(id, estado:tipo, ga_indice) %>% 
  mutate(year = 2019)

db_17 = db17 %>% 
  select(id, estado:tipo, ga_indice) %>% 
  mutate(year = 2017)

p1 = db21 %>% 
  rbind(db_19, db_17) %>% 
  select(id, ga_indice, year) %>% 
  rename(mean = ga_indice) %>% 
  pivot_wider(names_from = "year",
              values_from = "mean") %>% 
  janitor::clean_names() %>% 
  mutate(x2019 = x2021-x2019) %>% 
  ggplot(aes(y = x2019,
             x = x2021)) + 
  geom_point(alpha = 0.3, col = "grey0") +
  geom_vline(xintercept = p_21,
             size = 1,
             linetype  = 2,
             alpha = 0.6) +
  geom_hline(yintercept = 0,
             size = 1,
             linetype  = 2,
             alpha = 0.6,
             col = "grey0") +
  annotate("text",
           x = p_21,
           y = .6,
           label = p_21_,
           size = 3.5,
           hjust = -.15,
           fontface = "bold",
           alpha = .8, family = "A") +
  scale_y_continuous(breaks = c(seq(-.4,.8,.1))) + 
  scale_x_continuous(breaks = c(seq(0,1,.2))) + 
  labs(x = "Índice obtenido en 2021",
       y = "Diferencia entre 2021 y 2019",
       y = NULL,
       fill = "Diferencia\nentre años"
  ) + 
  theme_light() +
  theme(legend.position = "right",
        legend.text = element_text(size = 12),
        axis.title = element_text(size = 10, face = "bold"),
        axis.title.y = element_text(color = "grey0"),
        plot.title = element_text(size = 18, face = "bold"),
        plot.caption = element_markdown(hjust = 0),
        text = element_text(family = "A"))

p2 = db21 %>% 
  rbind(db_19, db_17) %>% 
  select(id, ga_indice, year) %>% 
  rename(mean = ga_indice) %>% 
  pivot_wider(names_from = "year",
              values_from = "mean") %>% 
  janitor::clean_names() %>% 
  mutate(x2019 = x2021-x2017) %>% 
  ggplot(aes(y = x2019,
             x = x2021)) + 
  geom_point(alpha = 0.3, col = "grey0") +
  geom_vline(xintercept = p_21,
             size = 1,
             linetype  = 2,
             alpha = 0.6) +
  geom_hline(yintercept = 0,
             size = 1,
             linetype  = 2,
             alpha = 0.6,
             col = "grey0") +
  annotate("text",
           x = p_21,
           y = .8,
           label = p_21_,
           size = 3.5,
           hjust = -.15,
           fontface = "bold",
           alpha = .8, family = "A") +
  scale_y_continuous(breaks = c(seq(-.4,.8,.1))) + 
  scale_x_continuous(breaks = c(seq(0,1,.2))) + 
  labs(x = "Índice obtenido en 2021",
       y = "Diferencia entre 2021 y 2017",
       y = NULL,
       fill = "Diferencia\nentre años"
  ) + 
  theme_light() +
  theme(legend.position = "right",
        legend.text = element_text(size = 12),
        axis.title = element_text(size = 10, face = "bold"),
        axis.title.y = element_text(color = "grey0"),
        plot.title = element_text(size = 18, face = "bold"),
        plot.caption = element_markdown(hjust = 0),
        text = element_text(family = "A"))

a = grid.arrange(p2, p1, nrow = 1)

ggsave(plot = a,
       "3_viz/g2.png",
       width = 10,
       height = 6,
       dpi = 900)


### G3----
db21 = db %>% 
  mutate(id = paste(estado, tipo, so, sep = "_")) %>% 
  select(id, estado, so, tipo, ga_indice) %>% 
  mutate(year = 2021)

db_19 = db19 %>% 
  select(id, estado:tipo, ga_indice) %>% 
  mutate(year = 2019)

db_17 = db17 %>% 
  select(id, estado:tipo, ga_indice) %>% 
  mutate(year = 2017)

db21 %>% 
  rbind(db_19, db_17) %>% 
  select(id, year, ga_indice) %>% 
  rename(mean = ga_indice) %>% 
  pivot_wider(names_from = "year",
              values_from = "mean") %>% 
  janitor::clean_names() %>% 
  mutate(x1 = (x2021-x2019),
         x2 = (x2019-x2017)) %>%
  select(id, x1, x2) %>% 
  mutate(color = case_when(
    x1 >= 0 & x2 >= 0 ~ "Avance en ambos años",
    x1 >= 0 & x2 < 0 ~ "Retroceso en 2019; avance en 2021",
    x1 < 0 & x2 >= 0 ~ "Avance en 2019; retroceso en 2021",
    x1 < 0 & x2 < 0 ~ "Retroceso en ambos años",
    T ~ "Otro"
  )) %>% 
  filter(color != "Otro") %>% 
  # count(color) %>% mutate(sum = sum(n)) %>% mutate(porc = n/sum)
  pivot_longer(x1:x2) %>% 
  mutate(name = str_replace_all(name, c("x1" = "2021",
                                        "x2" = "2019"))) %>%
  filter(is.numeric(value)) %>% 
  mutate(color = factor(color,
                        order = T,
                        levels = c("Retroceso en ambos años",
                                   "Avance en 2019; retroceso en 2021",
                                   "Retroceso en 2019; avance en 2021",
                                   "Avance en ambos años"))) %>% 
  ggplot(aes(y = value,
             x = name,
             group = id)) + 
  geom_hline(yintercept = 0,
             size = 1,
             linetype  = 2,
             alpha = 0.6,
             col = "grey0") +
  facet_wrap(~color) + 
  geom_line(alpha = 0.05, col = "grey0") +
  scale_x_discrete(labels = c("2019-2017", "2021-2019")) + 
  labs(x = "Años comparados",
       y = "Diferencia con respecto\nal año anterior\n"
  ) + 
  theme_light() +
  theme(legend.position = "right",
        legend.text = element_text(size = 12),
        axis.title = element_text(size = 10, face = "bold"),
        axis.title.y = element_text(color = "grey0"),
        plot.title = element_text(size = 18, face = "bold"),
        plot.caption = element_markdown(hjust = 0),
        text = element_text(family = "A"),
        strip.text.x = element_text(size = 12, face = "bold",
                                    color = "grey0"))

ggsave("3_viz/g3.png",
       width = 6,
       height = 5,
       dpi = 900)


# G4----
db21 = db %>% 
  select(estado, so, tipo, ga_indice) %>% 
  mutate(year = 2021)

db_19 = db1 %>% 
  select(estado:tipo, ga_indice19) %>% 
  rename(ga_indice = ga_indice19) %>% 
  mutate(year = 2019)

db_17 = db1 %>% 
  select(estado:tipo, ga_indice17) %>% 
  rename(ga_indice = ga_indice17) %>% 
  mutate(year = 2017)

db21 %>% 
  rbind(db_19, db_17) %>% 
  rename(mean = ga_indice) %>% 
  select(estado, year, mean) %>% 
  ggplot(aes(y = year,
             x = mean,
             group = year)) +
  geom_boxplot() + 
  geom_violin(fill = "transparent") +
  geom_jitter(height = .2, alpha = .2) +
  scale_y_continuous(breaks = c(2017, 2019, 2021)) + 
  scale_x_continuous(breaks = c(seq(0,1,.2))) + 
  labs(y = "Año\n",
       x = "Valor de cada entidad"
  ) + 
  theme_light() +
  theme(panel.grid.minor.y = element_blank(),
        legend.position = "right",
        legend.text = element_text(size = 12),
        axis.title = element_text(size = 10, face = "bold"),
        axis.title.y = element_text(color = "grey0"),
        plot.title = element_text(size = 18, face = "bold"),
        
        plot.caption = element_markdown(hjust = 0),
        text = element_text(family = "A"))

ggsave("3_viz/g4.png",
       width = 6,
       height = 5,
       dpi = 900)


### G5----
quantile(db21$t_indice, c(.25,.5,.75))
quantile(db21$p_indice, c(.25,.5,.75))

quantile(db21$t_indice, c(.75)) - quantile(db21$t_indice, c(.25))
quantile(db21$p_indice, c(.75)) - quantile(db21$p_indice, c(.25))

db21 = db %>% 
  select(estado, so, tipo, t_indice, p_indice) %>% 
  mutate(year = 2021)

db21 %>% 
  pivot_longer(t_indice:p_indice) %>% 
  mutate(name = str_replace_all(name, c("t_indice" = "Transparencia",
                                        "p_indice" = "Participación"))) %>% 
  ggplot(aes(y = name,
             x = value,
             group = name)) +
  geom_boxplot() +
  geom_violin(fill = "transparent") +
  geom_jitter(height = .2, alpha = .2) +
  scale_x_continuous(breaks = c(seq(0,1,.2))) + 
  labs(y = "Dimensión\n",
       x = "Valor de cada sujeto obligado",
  ) + 
  theme_light() +
  theme(legend.position = "right",
        legend.text = element_text(size = 12),
        axis.title = element_text(size = 10, face = "bold"),
        axis.title.y = element_text(color = "grey0"),
        plot.title = element_text(size = 18, face = "bold"),
        plot.caption = element_markdown(hjust = 0),
        text = element_text(family = "A"))

ggsave("3_viz/g5.png",
       width = 6,
       height = 4,
       dpi = 900)


### G6----
db21 = db %>% 
  mutate(id = paste(estado, tipo, so, sep = "_")) %>% 
  select(id, estado, so, tipo, t_indice, p_indice) %>% 
  mutate(year = 2021)

db21 %>% 
  select(id, year, t_indice, p_indice) %>% 
  mutate(color = t_indice > p_indice) %>% 
  # count(color) %>% mutate(sum = sum(n)) %>% mutate(porc = n/sum)
  pivot_longer(t_indice:p_indice) %>% 
  pivot_wider(names_from = "year",
              values_from = "value") %>% 
  janitor::clean_names() %>% 
  rename(var = name) %>%
  rename(value = x2021) %>% 
  mutate(var = str_replace_all(var, c("t_indice" = "Transparencia",
                                      "p_indice" = "Participación"))) %>% 
  mutate(color = str_replace_all(color, c("FALSE" = "Mejor en participación",
                                          "TRUE" = "Mejor en transparencia"))) %>% 
  filter(is.numeric(value)) %>% 
  ggplot(aes(y = value,
             x = var,
             group = id)) + 
  facet_wrap(~color) + 
  geom_line(alpha = 0.05, col = "grey0") +
  labs(x = "Dimensión",
       y = "Valor en 2021\n"
  ) + 
  theme_light() +
  theme(legend.position = "right",
        legend.text = element_text(size = 12),
        axis.title = element_text(size = 10, face = "bold"),
        axis.title.y = element_text(color = "grey0"),
        plot.title = element_text(size = 18, face = "bold"),
        plot.caption = element_markdown(hjust = 0),
        text = element_text(family = "A"),
        strip.text.x = element_text(size = 12, face = "bold",
                                    color = "grey0"))

ggsave("3_viz/g6.png",
       width = 6,
       height = 4,
       dpi = 900)


### G7----
db %>% 
  select(estado, so, tipo, 
         tgai_indice, tgta_indice, tgda_indice,
         tcai_indice, tctp_indice,
         pg_indice, pc_indice) %>% 
  pivot_longer(tgai_indice:pc_indice) %>% 
  mutate(num = rep(1:7, 1365)) %>% 
  group_by(name) %>% 
  mutate(mean = median(value, na.rm = T)) %>% 
  ungroup() %>% 
  group_by(name) %>% 
  ggplot(aes(x = value,
             y = fct_reorder(name, mean))) +
  geom_boxplot(fill = "transparent", show.legend = F) +
  geom_violin(fill = "transparent") +
  geom_count() +
  scale_x_continuous(limits = c(0,1),
                     breaks = c(seq(0,1,.2))) +
  scale_y_discrete(labels = c("Datos abiertos","Transparencia proactiva","PG","PC","Transparencia activa","Acceso a la info. (TC)", "Acceso a la info. (TG)")) +
  theme_light() + 
  labs(x = "Valor en 2021",
       y = NULL,
       subtitle = NULL,
       color = NULL,
       size = "Número de\nobservaciones\ncon ese valor"
  ) + 
  theme_light() +
  theme(legend.position = "top",
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 12),
        plot.title = element_text(size = 18, face = "bold"),
        plot.caption = element_markdown(hjust = 0),
        axis.text.y = element_text(size = 9, face = "bold"),
        text = element_text(family = "A")) +
  guides(color = FALSE) 

ggsave("3_viz/g7.png",
       width = 7,
       height = 6,
       dpi = 900)

