library(ggplot2)
library(mgcv)
library(hash)

metricNames <- hash(
  keys=c(
    'MidentifierLength',
    'AMidentifierLength',
    'MfileLen',
    'Dinternal',
    'MfileStatement',
    'Dconst',
    'Dunsigned',
    'MlineLen',
    'AMlineLen',
    'Dregister',
    'Drestrict',
    'Dvoid',
    'Dkludge',
    'Dvolatile',
    'Denum',
    'Inconsistency',
    'McommentSize',
    'SDindentation',
    'Dstatement',
    'MindentationSpace',
    'AMindentationSpace',
    'Dgoto',
    'DcommentChar',
    'MfunLen',
    'CcppConditional',
    'DcppInclude',
    'Mnesting',
    'AMnesting',
    'Dcomment',
    'Dsigned',
    'Dinline',
    'Dnoalias',
    'DcppNoInclude'
  ),
  values=c(
    'Mean identifier length',
    'Average median identifier length',
    'Mean file length (lines)',
    'Internally visible declaration density',
    'Mean file functionality (statements)',
    'const keyword density',
    'unsigned keyword density',
    'Mean line length (characters)',
    'Average median line length (characters)',
    'register keyword density',
    'restrict keyword density',
    'void keyword density',
    'Kludge word density',
    'volatile keyword density',
    'enum keyword density',
    'Formatting inconsistency',
    'Mean comment size',
    'Indentation spaces standard deviation',
    'Statement density',
    'Mean indentation spaces',
    'Average median indentation spaces',
    'goto keyword density',
    'Comment character density',
    'Mean function length (lines)',
    'C preprocessor conditional statement density',
    'C preprocessor include statement density',
    'Mean statement nesting',
    'Average median statement nesting',
    'Comment density',
    'signed keyword density',
    'inline keyword density',
    'noalias keyword density',
    'C preprocessor non-include statement density'
  )
)

analyseMetric <- function(metricName, df, outputDir, miniature=FALSE, 
                          predictPoints=80) {
  
  fit <- gam(y ~ s(x, bs="cs"), data=data.frame(y=df[[metricName]], 
                                         x=as.numeric(df$ReleaseDate)))
  
  range <- range(df$ReleaseDate, na.rm = TRUE)
  xseq <- seq(range[1], range[2], length.out = predictPoints)
  
  predicted <- predict(fit, data.frame(x=as.numeric(xseq)), se.fit=TRUE)
  
  predicted.data <- data.frame(x=xseq, y=predicted$fit)
  
  p <- ggplot(data.frame(y=df[[metricName]], x=df$ReleaseDate), aes(x, y)) 

  if (miniature == TRUE) {
    p <- p + geom_point(size=5) + geom_line(aes(x, y), data=predicted.data, 
                                      colour="black")
  } else {
    p <- p + geom_point() + geom_line(aes(x, y), data=predicted.data, 
                                      colour="blue")
  }
  predframe <- data.frame(x=xseq, y=predicted$fit,
                          lwr=predicted$fit-predicted$se.fit, 
                          upr=predicted$fit+predicted$se.fit)
  
  if (miniature == TRUE) {
    p <- p + geom_ribbon(data=predframe, aes(ymin=lwr, ymax=upr), 
                         alpha=0.5)
  } else{
    p <- p + geom_ribbon(data=predframe, aes(ymin=lwr, ymax=upr), 
                         alpha=0.3)
  }
  output.basename = metricName
  if (miniature == TRUE) {
    p <- p + theme_bw() + theme(axis.title=element_blank(),
                                axis.text=element_blank(),
                                axis.ticks=element_blank())
    output.basename <- paste(output.basename, "m", sep="-")
  } else {
    rot.angle <- if (max(df[[metricName]] <= 1)) -45 else 0
    
    p <- p + theme(axis.title.x=element_blank(), 
                   axis.title.y=element_blank(),
                   axis.text.x=element_text(size=18),
                   axis.text.y=element_text(size=18, angle=rot.angle))
  }
  p
  plotFilename <- paste(output.basename, "pdf", sep=".")
  ggsave(plot=p, filename=plotFilename, path=outputDir, device=cairo_pdf)
  fit
}

analyseSummariseMetric <- function(metric, df, output.dir) {
  fit <- analyseMetric(metric, df, output.dir)
  summary.fit <- summary(fit)
  adj.r.squared <- summary.fit$r.sq
  c(metric, adj.r.squared)
}

series.dir <- file.path(dirname(getwd()), "series")
output.dir <- file.path(dirname(getwd()), "analysis")
files <- dir(series.dir, pattern = '\\.txt', full.names = TRUE)
tables <- lapply(files, read.table)
df <- do.call(rbind, tables)
headers <- read.table("series-header.tab", header=TRUE)
colnames(df) <- colnames(headers)
df$ReleaseDate <- as.Date(df$Date)

metrics <- colnames(df)[4:(length(colnames(df)) - 1)]

cl.args <- commandArgs(TRUE)

if (length(cl.args) >= 1 && cl.args[1] == '-m') {
  lapply(metrics, analyseMetric, df, output.dir, miniature=TRUE)
} else {

  all.adj.r.squared <- lapply(metrics, analyseSummariseMetric, df, output.dir)
  
  ord <- order(sapply(all.adj.r.squared, `[`, 2), decreasing=TRUE)
  
  summary.ord <- lapply(all.adj.r.squared[ord], function(x) { 
    if (has.key(x[1], metricNames)) {
      list(x[1], metricNames[[x[1]]], round(as.numeric(x[2]), 2))
    } else {
      list(x[1], x[1], round(as.numeric(x[2]), 2))
    }
  })
  
  print(summary.ord)
  write.table(do.call(rbind, summary.ord), file="analysis-summary.csv",
              sep=",", col.names=FALSE)
}
