library(fst)
library(data.table)
library(ggplot2)
library(tidyverse)
library(ggrepel)

args <- commandArgs(trailingOnly=TRUE)

input_path <- args[1]

output_path <- args[2]

cleaned_df <- read_fst(input_path, as.data.table = T)

iv.vars <- c("iv.pagerank.log","iv.hits.log","iv.cohits.log","iv.bgrm.log","iv.birank.log", "iv.hits.logged.mme", "iv.cohits.logged.mme", "iv.bgrm.logged.mme", "iv.birank.logged.mme")
tdf <- cleaned_df[, c("dv.io", iv.vars), with = F]
names(tdf) <- c("Overdose", "PageRank", "HITS", "CoHITS", "BGRM", "BiRank", "MME HITS", "MME CoHITS", "MME BGRM", "MME BiRank")
tdf <- tdf[, lapply(.SD, function(x) round(x, 2))]
tdf <- tdf[, lapply(.SD, function(x) ifelse(x > 3, 3, x))]
tdf <- tdf[, lapply(.SD, function(x) ifelse(x < (-1), (-1), x))]

tmp <- tidyr::gather(tdf, key = "rank", value = "value", -Overdose)

d <- tmp
d$x <- d$value
d$y <- d$Overdose
d$z <- d$rank
labelInfo <-
    split(d, d$z) %>%
    lapply(function(t) {
    data.frame(
        predAtMax = lm(y ~ x, data = t) %>%
        predict(newdata = data.frame(x = max(t$x)))
        , max = max(t$x)
    )}) %>%
    bind_rows

labelInfo$label = levels(factor(d$z))

p <- ggplot(data = tmp, aes(x = value, y = Overdose, color = rank)) +
  labs(x = "Centrality (z-score)", y = "Predicted Probability of Opioid Overdose") +
  stat_smooth(aes(linetype = rank), size = 1, se=F, method = "lm") +
  scale_colour_grey(name = "Centrality Measure", start = .5, end = 0) +
  theme_bw()+
  theme(
    axis.line = element_line(colour = "black"),
    legend.position="none",
    text = element_text(family = "serif"),
    title = element_text(size = 10, face = "bold"),
    axis.text.x = element_text(size = 8.5),
    axis.text.y = element_text(size = 9),
    line = element_blank(),
    plot.margin = unit(c(1,4,1,1), "lines"),
    axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)),
    axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0))) +
    coord_cartesian(xlim = c(-1,3), clip = "off")+
  scale_x_continuous(breaks=c(-1,0,1,2,3), limits=c(-1,3)) +
  geom_text_repel(data = labelInfo,
                   aes(x = max, y = predAtMax,
                       label = label,
                       color = label),
                   size = 2.5,
                   nudge_x = .1,
                   direction = "y",
                   force = .25,
                   xlim = c(4, NA))
ggsave(tsave1 <- output_path, p, width = 8, height = 4.5, units = "in")