library(ggplot2)
library(segmented)
library(splines)

series.dir <- file.path(dirname(getwd()), "series")
files <- dir(series.dir, pattern = '\\.txt', full.names = TRUE)
tables <- lapply(files, read.table)
df <- do.call(rbind, tables)
headers <- read.table("series-header.tab", header=TRUE)
colnames(df) <- colnames(headers)
df$ReleaseDate <- as.Date(df$Date)

p <- ggplot(df, aes(x = ReleaseDate, y = MfunLen)) + geom_point() 

p <- p + geom_smooth(method=lm)

my.lm <- lm(MfunLen ~ ReleaseDate, data = df)

my.lm.summary <- summary(my.lm)

my.lm.summary

p <- p + annotate("text", x = as.Date("1990-01-01"), y =45, 
                  label=paste("R^2 ==", 
                              signif(my.lm.summary$r.squared, 3)),
                  parse=TRUE)


ggsave(plot=p, filename='MfunLen_Linear.pdf')

p <- ggplot(df, aes(x = ReleaseDate, y = MfunLen)) + geom_point() 

my.seg <- segmented(my.lm, seg.Z= ~ReleaseDate, psi=1990)

summary(my.seg)

my.fitted <- fitted(my.seg)
my.model <- data.frame(ReleaseDate = df$ReleaseDate, MfunLen = my.fitted)

p <- p + geom_line(data = my.model, aes(x = ReleaseDate, y = MfunLen), 
                   colour = "tomato")

my.lines <- my.seg$psi[, 2]
# p <- p + geom_vline(xintercept = my.lines, linetype = "dashed")

min.ReleaseDate <- min(df$ReleaseDate)
intercept.ReleaseDate <- as.Date(my.lines, origin="1970-01-01")

# p <- p + geom_text(aes(intercept.ReleaseDate, 0, label = "ASAS", 
#                        size=2, hjust=1))

fit <- as.data.frame(predict(my.seg, interval="confidence"))

p <- p + geom_ribbon(aes(ymin=fit$lwr, ymax=fit$upr), alpha=0.3)

davies.test(my.lm, ~ReleaseDate)

ggsave(plot=p, filename='MfunLen_Ribbon.pdf')

df$segment <- ifelse(df$ReleaseDate <= intercept.ReleaseDate, 1, 2)
df$segment <- factor(df$segment, levels=c(1, 2), 
                     labels=c(paste("<= ", intercept.ReleaseDate),
                              paste("> ", intercept.ReleaseDate)))

p <- qplot(data=df, ReleaseDate, MfunLen, geom=c("point", "smooth"), 
      method="lm", formula=y~x, color=segment)


regressions <- by(df, df$segment, 
                  function(group) lm(group$MfunLen~group$ReleaseDate))
regressions.summary <- lapply(regressions, summary)

p <- p + annotate("text", x = as.Date("1990-01-01"), y = 45, 
                  label=paste("R^2 ==", 
                              signif(regressions.summary[[1]]$r.squared, 3)),
                  parse=TRUE)

p <- p + annotate("text", x = as.Date("2005-01-01"), y = 50, 
                  label=paste("R^2 ==", 
                              signif(regressions.summary[[2]]$r.squared, 3)),
                  parse=TRUE)

p
ggsave(plot=p, filename='MfunLen_Segment.pdf')

bs.fit <- lm(df$MfunLen ~ bs(df$ReleaseDate, degree=1, df=3))

predicted <- as.data.frame(predict(bs.fit, data.frame(x=df$ReleaseDate), 
                                   interval="confidence"))

spline.data <- data.frame(x=df$ReleaseDate, y=predicted$fit)

p <- ggplot(df, aes(ReleaseDate, MfunLen)) 

p <- p + geom_point() + geom_line(aes(x, y), data=spline.data, colour="tomato")

p <- p + geom_ribbon(aes(ymin=predicted$lwr, ymax=predicted$upr), alpha=0.3)

p
ggsave(plot=p, filename='MfunLen_Spline.pdf')
