# set working directory (see https://stackoverflow.com/a/35842119)
dir = tryCatch({
  # script being sourced
  getSrcDirectory()[1]
}, error = function(e) {
  # script being run in RStudio
  dirname(rstudioapi::getActiveDocumentContext()$path)
})
setwd(dir)

library("stringr")  
library("sqldf")

########################################

tweet <- read.csv("tweet.csv", header=TRUE, sep=",", quote="\"", strip.white=TRUE, encoding="UTF-8", na.strings=c("", "null"), stringsAsFactors=FALSE)
nrow(tweet)
# 1 

replies <- read.csv("replies.csv", header=TRUE, sep=",", quote="\"", strip.white=TRUE, encoding="UTF-8", na.strings=c("", "null"), stringsAsFactors=FALSE)
nrow(replies)
# 476

users <- read.csv("expanded_users.csv", header=TRUE, sep=",", quote="\"", strip.white=TRUE, encoding="UTF-8", na.strings=c("", "null"), stringsAsFactors=FALSE)
nrow(users)
# 364

data <- tweet[,c("id", "author_id", "created_at", "text")]
data$in_reply_to_tweet_id <- tweet$id
data <- rbind(data, replies[order(replies$created_at),c("id", "author_id", "in_reply_to_tweet_id", "created_at", "text")])

tweet_to_string <- function(tweet_id, depth=0) {
  indention <- str_flatten(rep(" ", 4*depth))
  tweet <- data[data$id == tweet_id,]
  created_at <- paste0(indention, tweet$created_at)
  tweet_link <- paste0(indention, "https://twitter.com/", trimws(tweet$author_id), "/status/", tweet_id)
  text <- paste0(indention, tweet$text)
  return(paste(created_at, tweet_link, text, "", sep="\n"))
}

get_replies_for_tweet <- function(tweet_id, depth) {
  filter <- data$in_reply_to_tweet_id == tweet_id
  matches <- which(filter)
  result <- tweet_to_string(tweet_id, depth)
  if (length(matches) > 0 && matches > 0) {
    replies_to_this_tweet <- data[filter, c("id")]
    for (i in 1:length(replies_to_this_tweet)) {
      result <- paste(result, get_replies_for_tweet(replies_to_this_tweet[i], depth+1), sep="\n")   
    }
  }
  return(result)
}

tweet_ids <- unique(replies$id)
txt_output = character()
for (i in 1:length(tweet_ids)) {
  tweet_id <- tweet_ids[i]
  txt_output <- paste(txt_output, get_replies_for_tweet(tweet_id, 0), "", sep="\n")
}
#cat(txt_output)

write.table(txt_output, file="replies.txt", sep=",", col.names=FALSE, row.names=FALSE, na="", quote=FALSE, qmethod="double", fileEncoding="UTF-8")

# export user data
users$tweet_link <- paste0("https://twitter.com/", trimws(users$id), "/status/", users$tweet_id)
unique_users <- sqldf("SELECT created_at, username, name, description, location, website, group_concat(tweet_link) as tweet_links FROM users GROUP BY created_at, username, name, description, location, website")

write.table(unique_users, file="users.csv", sep=",", col.names=TRUE, row.names=FALSE, na="", quote=TRUE, qmethod="double", fileEncoding="UTF-8")


