package backend.analysis.textualSimilarities;

import com.github.chen0040.data.utils.TupleTwo;
import com.github.chen0040.lda.Doc;
import com.github.chen0040.lda.Lda;
import com.github.chen0040.lda.LdaResult;

import java.util.ArrayList;
import java.util.List;

public class LDA {

    public LDA(){

    }

    public void computeLDA(List<String> comments) {

        List<String> docs = comments;

        Lda method = new Lda();
        method.setTopicCount(20);
        method.setMaxVocabularySize(20000);

        LdaResult result = method.fit(docs);
        for(Doc doc : result.documents()){

            List<TupleTwo<Integer, Double>> topTopics = doc.topTopics(3);

            //System.out.println("Top Topics: {} (score: {}), {} (score: {}), {} (score: {})\n" +
               //     topTopics.get(0)._1() + topTopics.get(0)._2()+"\n"
                //    +topTopics.get(1)._1() + topTopics.get(1)._2()+"\n"
                //    +topTopics.get(2)._1() +topTopics.get(2)._2()+"\n");
        }
    }

    public void topicCount(List<String> comments) {

        List<String> docs = filterComments(comments);

        Lda method = new Lda();
        method.setTopicCount(1);
        method.setMaxVocabularySize(200000);

        LdaResult result = method.fit(docs);
        int topicCount = result.topicCount();

        for (int topicIndex = 0; topicIndex < topicCount; ++topicIndex) {
            String topicSummary = result.topicSummary(topicIndex);
            List<TupleTwo<String, Integer>> topKeyWords = result.topKeyWords(topicIndex, 50);
            List<TupleTwo<Doc, Double>> topStrings = result.topDocuments(topicIndex, 150);
            for (TupleTwo<String, Integer> entry : topKeyWords) {
                String keyword = entry._1();
                int score = entry._2();
            }

            for (TupleTwo<Doc, Double> entry : topStrings) {
                double score = entry._2();
                int docIndex = entry._1().getDocIndex();
                String docContent = entry._1().getContent();
            }
        }
    }


    private List<String> filterComments(List<String> comments) {

        List<String> filteredComments = new ArrayList<>();

        for (String comment : comments) {
            String commentsSeparate = comment.replaceAll("(?!^)(_)"," $1");
            commentsSeparate = commentsSeparate.replaceAll("(?!^)([\\d])"," $1");
            commentsSeparate = commentsSeparate.replaceAll("(?!^)([A-Z])", " $1");
            String  commentWithoutSpecialChar = commentsSeparate.replaceAll("[^\\w\\s]","");
            String commentToLowercase = commentWithoutSpecialChar.toLowerCase();

            String[] wordsInComment = commentToLowercase.split(" ");
            List<String> wordsFiltered = new ArrayList<>();

            for (String word : wordsInComment) {
                if (!Stopwords.isStopWord(word)) {
                    wordsFiltered.add(word);
                }
            }

            List<String> stemmedWords = new ArrayList<>();
            PorterStemmer porterStemmer = new PorterStemmer();

            for (String wordToStem : wordsFiltered) {
                stemmedWords.add(porterStemmer.stemString(wordToStem));
            }

            String stemmedComment = "";
            for(String word : stemmedWords){
                stemmedComment = stemmedComment.concat(word);
                stemmedComment = stemmedComment.concat(" ");
            }
            filteredComments.add(stemmedComment);

        }
        return filteredComments;
    }


}
