Big Corpus – Size 1000, step 200, with punct
# Prepare data
dists = read.csv("FINAL_PREDICTIONS_s1000-step200-nopunct_BigData.csv")
# Get average point
x = lapply(strsplit(sub(pattern = "^[^_]+\\_", "", dists[,2]), "-"), as.numeric)
x = lapply(x, mean)
#labels = read.csv("subdivisions.csv")
#Plot
couls = matrix(ncol = 1, nrow = ncol(dists)-3, data=c(
"dodgerblue2", "khaki2",
"green4",
"#6A3D9A", # purple
"#FF7F00", # orange
"gold1", "palegreen2", # red
"skyblue2", "#FB9A99", # lt pink
"#E31A1C",
"#CAB2D6", # lt purple
"black",
"#FDBF6F", # lt orange
"gray70",
"maroon", "orchid1", "deeppink1", "blue1", "steelblue4",
"darkturquoise", "green1", "yellow4", "yellow3",
"darkorange4", "brown"
)[1:(ncol(dists)-3)], dimnames = list(colnames(dists)[4:ncol(dists)], "color"))
for(i in 4:ncol(dists)){
if(i == 4){
plot(x, smooth(dists[,4], kind = "3RSR"), type="l", ylim = c(round(min(dists[, 4:ncol(dists)]), digits = 1),1), lty=1, ylab = "decision function", xlab = "words", col=couls[colnames(dists)[i],], ljoin=3)
}
else{
lines(x, smooth(dists[,i], kind = "3RSR"), col=couls[colnames(dists)[i],], ljoin=3)
}
}
legend(1,1, legend=colnames(dists)[4:ncol(dists)], col=couls[colnames(dists[4:ncol(dists)]),], lty=1, ncol=1, cex = 0.5)
abline(v=labels[,2], col="grey", lty=2)
text(x = labels[,2], y = 0.5, labels[,1], srt=90, cex=0.8)

Clean corpus – Size 1000, step 200, with punct
# Prepare data
dists = read.csv("FINAL_PREDICTIONS_s1000-step200-nopunct.csv")
# Get average point
x = lapply(strsplit(sub(pattern = "^[^_]+\\_", "", dists[,2]), "-"), as.numeric)
x = lapply(x, mean)
#labels = read.csv("subdivisions.csv")
#Plot
#dists2[,
#dists = dists[, !colnames(dists) %in% c("ericTrump", "melaniaTrump", "GeneralFlynn")]
for(i in 4:ncol(dists)){
if(i == 4){
plot(x, smooth(dists[,4], kind = "3RSR"), type="l", ylim = c(round(min(dists[, 4:ncol(dists)]), digits = 1),1), lty=1, ylab = "decision function", xlab = "words", col=couls[colnames(dists)[i],], ljoin=3)
}
else{
lines(x, smooth(dists[,i], kind = "3RSR"), col=couls[colnames(dists)[i],], ljoin=3)
}
}
legend(1,1, legend=colnames(dists)[4:ncol(dists)], col=couls[colnames(dists[4:ncol(dists)]),], lty=1, ncol=1, cex=0.8)
abline(v=labels[,2], col="grey", lty=2)
text(x = labels[,2], y = 0.5, labels[,1], srt=90, cex=0.8)
