
#--------------------------------------------
########## Figure 2 ##########
#--------------------------------------------

library(plm)
library(lmtest)
library(sandwich)
library(fixest)
library(foreign)
library(foreach)
library(ggplot2)
library(cowplot)
library(ggtext)
library(scales)
library(modelsummary)
library(dplyr)

######  A #######
data<-read.csv("./Replication_AQ_EV/Data/dataset_with_logchargePs.csv", sep=',', header=TRUE)

data <- data[!is.na(data$temperature), ]
data <- data[!is.na(data$logchargePs), ]


model_linear <- lm(logchargePs ~ temperature, data = data)
model_quadratic <- lm(logchargePs ~ temperature + I(temperature^3), data = data)
data$yhat_linear <- predict(model_linear, newdata = data)
data$yhat_quadratic <- predict(model_quadratic, newdata = data)
X_linear <- model.matrix(model_linear)
X_quadratic <- model.matrix(model_quadratic)

cov_matrix_linear <- vcovCL(model_linear, cluster = ~ id_month)
cov_matrix_quadratic <- vcovCL(model_quadratic, cluster = ~ id_month)
data$stdp_linear <- sqrt(rowSums((X_linear %*% cov_matrix_linear) * X_linear))
data$stdp_quadratic <- sqrt(rowSums((X_quadratic %*% cov_matrix_quadratic) * X_quadratic))

data$yhat_lwr_linear <- data$yhat_linear - 1.96 * data$stdp_linear
data$yhat_upr_linear <- data$yhat_linear + 1.96 * data$stdp_linear
data$yhat_lwr_quadratic <- data$yhat_quadratic - 1.96 * data$stdp_quadratic
data$yhat_upr_quadratic <- data$yhat_quadratic + 1.96 * data$stdp_quadratic


plot_line <- ggplot(data, aes(x = temperature)) +
  geom_line(aes(y = yhat_linear, color = "Linear"), size = 1.2) +
  geom_ribbon(aes(ymin = yhat_lwr_linear, ymax = yhat_upr_linear), alpha = 0.6, fill = "pink") +
  geom_line(aes(y = yhat_quadratic, color = "Cubic"), size = 1.2) +
  geom_ribbon(aes(ymin = yhat_lwr_quadratic, ymax = yhat_upr_quadratic), alpha = 0.3, fill = "lightblue") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
  scale_color_manual(values = c("Linear" = "red", "Cubic" = "blue"),
                     labels = c("Linear" = "Linear spline", "Cubic" = "Cubic spline"),
                     breaks = c("Linear", "Cubic")) +
  theme_minimal() +
  labs(title = expression(atop(bold("A. correlation between temperature"), bold("and EVCA per station"))),
    x = NULL, y = "EVCA Per Station (kWh)", color = "Model Type") +
  theme(
    plot.title = element_text(size = 28, hjust = 0.2),
    axis.title.x = element_blank(), 
    axis.text.x = element_blank(), 
    axis.ticks.x = element_blank(),
    panel.grid = element_blank(), 
    axis.line.y = element_line(color = "darkgrey", size = 0.8), 
    axis.line.x = element_blank(),
    axis.ticks.y = element_line(color = "black", size = 0.8),
    axis.title.y = element_text(size = 26),  
    axis.text.y = element_text(size = 26),
    legend.position = c(0.35, 0.8), 
    legend.title = element_text(size = 26),
    legend.text = element_text(size = 26)
  )


plot_hist <- ggplot(data, aes(x = temperature)) +
  geom_histogram(binwidth = 1, fill = "#8B7500", color = "darkgrey") + 
  theme_minimal() +
  labs(x = "Weekly average temperature (°C)", y = NULL) +
  scale_x_continuous(labels = function(x) paste0(x, "")) +
  theme(
    axis.title.y = element_blank(), 
    axis.text.y = element_blank(), 
    axis.ticks.y = element_blank(),
    panel.grid = element_blank(),
    axis.line.x = element_line(color = "darkgrey", size = 0.8),
    axis.line.y = element_blank(),
    axis.title.x = element_text(size = 28), 
    axis.text.x = element_text(size = 25) 
  )

temperature_plot <- plot_grid(plot_line, plot_hist, align = "v", axis = "lr", ncol = 1, rel_heights = c(3, 1.0))




######  C+D #######
df<-read.csv("./Replication_AQ_EV/Data/dataset_compre.csv", sep=',', header=TRUE)
df <- df[!is.na(df$temperature), ]
df <- df[!is.na(df$raining), ]
models <- list()
temperature_ranges <- list(
  "Model1" = df[df$temperature <= -7, ],
  "Model2" = df[df$temperature > -7 & df$temperature <= 7, ],
  "Model3" = df[df$temperature > 7 & df$temperature <= 17, ],
  "Model4" = df[df$temperature > 17 & df$temperature <= 30, ],
  "Model5" = df[df$temperature > 30, ]
)

####### NO2 #######
for (i in 1:length(temperature_ranges)) {
  model_name <- names(temperature_ranges)[i]
  temp_df <- temperature_ranges[[i]]
  
  model <- feols(log_NO2 ~ temperature + raining + wind + I(temperature^2) + I(raining^2) + I(wind^2) | id^month_only + province_id^year_only | log_Total ~ log_cumulative_plugs, 
                 cluster = ~id_month, 
                 data = temp_df)
  
  models[[model_name]] <- model
}

coefs <- lapply(seq_along(models), function(i) {
  model <- models[[i]]
  coef <- coef(model)[1] 
  ci <- confint(model)[1, ] 
  data.frame(
    Model = names(models)[i],
    Estimate = coef,
    Lower = ci[1],
    Upper = ci[2]
  )
})

coefs <- do.call(rbind, coefs)
coefs$Temperature <- factor(c("< -7", "[-7, 7]", "[7, 17]", "[17, 30]", "> 30"), 
                            levels = c("< -7", "[-7, 7]", "[7, 17]", "[17, 30]", "> 30"))
names(coefs) <- c("Model", "Estimate", "Lower", "Upper", "Temperature")

coef_plot <- ggplot(coefs, aes(x = Temperature, y = Estimate)) +
  geom_point(size = 6, color = "#F8766D", shape = 16) +
  geom_errorbar(aes(ymin = Lower, ymax = Upper), width = 0.2, color = "#F8766D", size = 1.5) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
  geom_vline(xintercept = 0, linetype = "solid", color = "black") +
  geom_label(aes(x = 3.0, y = -0.045, label = "Below −7 degree, decrease\nby 0.045% (p-val.<0.001)"), size = 8.5, color = "black", fill = "white", label.size = 0) +
  theme_minimal(base_size = 15) +
  ggtitle(expression(atop(bold("B. temperature heterogeneity in"), bold("the estimated effects on NO"["2"]*"")))) +
  xlab("Temperature") +
  ylab(expression(atop("Estimated", "effects on NO"["2"]*" (%)"))) +
  theme(
    axis.line.y = element_line(color = "black"),
    axis.title.x = element_blank(),
    axis.text.x = element_blank(), 
    axis.ticks.x = element_blank(),
    axis.title.y = element_text(size = 26, face = "bold"),
    axis.text.y = element_text(size = 25),
    axis.ticks.y = element_line(color = "black"),
    plot.title = element_text(size = 28, hjust = -0.5),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()
  )


histogram_plot <- df %>%
  mutate(TemperatureRange = cut(temperature, 
                                breaks = c(-Inf, -7, 7, 17, 30, Inf), 
                                labels = c("<-7", "[-7, 7)", "[7, 17)", "[17, 30]", ">30"),
                                right = FALSE)) %>% 
  ggplot(aes(x = TemperatureRange)) +
  geom_bar(fill = "#8B7500", color = "darkgrey") +
  scale_y_continuous(expand = expansion(mult = c(0, 0.2))) +
  theme_minimal() +
  xlab("Temperature (°C)") +
  ylab("Count") +
  theme(
    axis.line.x = element_line(color = "black"),
    axis.title = element_text(size = 28),
    axis.text = element_text(size = 25),
    axis.text.y = element_blank(),
    axis.title.y = element_blank(),
    axis.ticks.y = element_blank(),
    plot.title = element_text(size = 26),
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(),
    axis.title.x = element_text(margin = margin(t = 10))
  )

no2_plot <- plot_grid(coef_plot, histogram_plot, align = "v", ncol = 1, rel_heights = c(3, 1.2))



# Fit models for each temperature range
for (i in 1:length(temperature_ranges)) {
  model_name <- names(temperature_ranges)[i]
  temp_df <- temperature_ranges[[i]]
  
  model <- feols(log_PM25 ~ temperature + raining + wind + I(temperature^2) + I(raining^2) + I(wind^2) | id^month_only + province_id^year_only | log_Total ~ log_cumulative_plugs, 
                 cluster = ~id_month, 
                 data = temp_df)
  
  models[[model_name]] <- model
}

coefs <- lapply(seq_along(models), function(i) {
  model <- models[[i]]
  coef <- coef(model)[1]
  ci <- confint(model)[1, ] 
  data.frame(
    Model = names(models)[i],
    Estimate = coef,
    Lower = ci[1],
    Upper = ci[2]
  )
})

coefs <- do.call(rbind, coefs)
coefs$Temperature <- factor(c("< -7", "[-7, 7]", "[7, 17]", "[17, 30]", "> 30"), 
                            levels = c("< -7", "[-7, 7]", "[7, 17]", "[17, 30]", "> 30"))
names(coefs) <- c("Model", "Estimate", "Lower", "Upper", "Temperature")

coef_plot <- ggplot(coefs, aes(x = Temperature, y = Estimate)) +
  geom_point(size = 6, color = "#00BFC4", shape = 16) + 
  geom_errorbar(aes(ymin = Lower, ymax = Upper), width = 0.2, color = "#00BFC4", size = 1.5) + 
  geom_hline(yintercept = 0, linetype = "dashed", color = "black") + 
  geom_vline(xintercept = 0, linetype = "solid", color = "black") + 
  geom_label(aes(x = 3.0, y = -0.04, label = "Below −7 degree, decrease\nby 0.04% (p-val.<0.001)"), size = 8.5, color = "black", fill = "white", label.size = 0) +
  theme_minimal(base_size = 15) +
  ggtitle(expression(atop(bold("C. temperature heterogeneity in"), bold("the estimated effects on PM"["2.5"]*"")))) +
  xlab("Temperature") + 
  ylab(expression(atop("Estimated", "effects on PM"["2.5"]*" (%)"))) + 
  theme(
    axis.line.y = element_line(color = "black"),
    axis.title.x = element_blank(),
    axis.text.x = element_blank(), 
    axis.ticks.x = element_blank(), 
    axis.title.y = element_text(size = 26), 
    axis.text.y = element_text(size = 25),
    axis.ticks.y = element_line(color = "black"),
    plot.title = element_text(size = 28, hjust = -0.5), 
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()
  )


histogram_plot <- df %>%
  mutate(TemperatureRange = cut(temperature, 
                                breaks = c(-Inf, -7, 7, 17, 30, Inf), 
                                labels = c("<-7", "[-7, 7)", "[7, 17)", "[17, 30)", ">30"),
                                right = FALSE)) %>% 
  ggplot(aes(x = TemperatureRange)) +
  geom_bar(fill = "#8B7500", color = "darkgrey") +
  scale_y_continuous(expand = expansion(mult = c(0, 0.2))) + 
  theme_minimal() +
  xlab("Temperature (°C)") +
  ylab("Count") +
  theme(
    axis.line.x = element_line(color = "black"),
    axis.title = element_text(size = 28),
    axis.text = element_text(size = 25),
    axis.text.y = element_blank(),
    axis.title.y = element_blank(),
    axis.ticks.y = element_blank(),
    plot.title = element_text(size = 26),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title.x = element_text(margin = margin(t = 10))
  )

pm25_plot <- plot_grid(coef_plot, histogram_plot, align = "v", ncol = 1, rel_heights = c(3, 1.2))




######  Combine All #######
library(cowplot)
plot_A <- temperature_plot 
plot_B <- no2_plot 
plot_C <- pm25_plot
combined_plot <- ggdraw() +
  draw_plot(plot_A, x = 0.01, y = 0, width = 0.25, height = 0.95) +
  draw_plot(plot_B, x = 0.296, y = 0, width = 0.35, height = 0.95) +
  draw_plot(plot_C, x = 0.65, y = 0, width = 0.35, height = 0.95)

ggsave("Figure2.svg", plot = combined_plot, width = 22.5, height = 8, device = "svg")





