package backend.analysis.machineLearning;

import backend.analysis.LocalDiffMatchPatch;
import backend.analysis.StringDiffer;
import backend.base.gerrit_data.*;
import org.apache.commons.text.similarity.LevenshteinDistance;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;

public class FeaturesExtractor {

    private class KeyWordsInfo {

        private int numberOfKeywords;
        private String singleKeyWord;

        private KeyWordsInfo(int numberOfKeywords, String singleKeyWord){
            this.numberOfKeywords = numberOfKeywords;
            this.singleKeyWord = singleKeyWord;
        }

    }

    private final List<String> keywords = new ArrayList<String>() {{
        add("private");
        add("public");
        add("protected");
        add("static");
        add("final");
        add("volatile");
        add("this");
        add("void");
    }};

    public FeaturesList computeFeatures(Modification modification, boolean setFlags) {

        FeaturesList features = new FeaturesList();

        features.oldLOC = computeLOC(modification.getOldCodeChunk());
        features.newLOC = computeLOC(modification.getNewCodeChunk());

        int[] oldLOCTypes = computeLOCTypes(modification.getOldCodeChunk());
        features.oldLOCComments = oldLOCTypes[0];
        features.oldLOCBlank = oldLOCTypes[1];
        features.oldLOCExec = oldLOCTypes[2];

        int[] newLOCTypes = computeLOCTypes(modification.getNewCodeChunk());
        features.newLOCComments = newLOCTypes[0];
        features.newLOCBlank = newLOCTypes[1];
        features.newLOCExec = newLOCTypes[2];

        features.oldFirstChar = computeFirstChar(modification.getOldCodeChunk());
        features.oldLastChar = computeLastChar(modification.getOldCodeChunk());

        features.newFirstChar = computeFirstChar(modification.getNewCodeChunk());
        features.newLastChar = computeLastChar(modification.getNewCodeChunk());

        features.levenshteinDistance = computeLexicalSimilarity(modification);

        features.numberOfAddedWords = computeNumberOfWordsModified(modification)[0];
        features.numberOfDeletedWords = computeNumberOfWordsModified(modification)[1];

        features.numberOfAddedCharacters = computeNumberOfCharacters(modification)[0];
        features.numberOfDeletedCharacters = computeNumberOfCharacters(modification)[1];

        CyclomaticComplexity cc = new CyclomaticComplexity(modification);
        features.oldCyclomaticComplexity = cc.calculateCyclomatic(modification.getOldCodeChunk());
        features.newCyclomaticComplexity = cc.calculateCyclomatic(modification.getNewCodeChunk());


        features.cyclomaticDiff = features.oldCyclomaticComplexity - features.newCyclomaticComplexity;
        features.LOCExecDiff = features.oldLOCExec - features.newLOCExec;
        features.LOCDiff = features.oldLOC - features.newLOC;
        features.LOCCommentsDiff = features.oldLOCComments - features.newLOCComments;
        features.LOCBlankDiff = features.oldLOCBlank - features.newLOCBlank;


        KeyWordsInfo keyWordsInfo = computeNumberOfKeywords(modification);
        features.numKeywords = keyWordsInfo.numberOfKeywords;
        if(keyWordsInfo.singleKeyWord!=null){
            features.keyword = keyWordsInfo.singleKeyWord;
        }
        else {
            features.keyword = "";
        }

        features.commentKeywords = computeCommentsKeywords(modification);

        if(setFlags) {
            setFlags(modification, features);
        }
        else{
            features.category = "?";
            features.group = "?";
            features.decomposedGroup = "?";
            features.type = "?";
            features.subtype = "?";
        }

        FunctionalFeaturesExtractor ffe = new FunctionalFeaturesExtractor();
        features.ifAdded = ffe.countIfNumber(modification);
        features.countFunctionsDiff = ffe.countFunctionsDiff(modification);
        features.countFunctionsChanged = ffe.countFunctionsChanged(modification);
        features.countFunctionsAdded = ffe.countFunctionAdded(modification);

        features.assignmentDiff = ffe.countAssignmentDiff(modification);

        features.cyclesDiff = ffe.countCyclesDiff(modification);
        features.cyclesNew = ffe.countCyclesNew(modification);

        features.brackets = ffe.isModificationInBrackets(modification);
        features.countIfDiff = ffe.countIFDiff(modification);
        features.countMinusAndPlus = ffe.countArithmeticDiff(modification);

        features.comparisonDiff = ffe.countComparisonDiff(modification);
        features.countNewAdded = ffe.countNewAdded(modification);
        features.bracketsDiff = ffe.countBracketsDiff(modification);

        features.commasDiff = ffe.countCommasDiff(modification);

        TextualFeatures tf = new TextualFeatures();
        features.wordsInComment = tf.numberOfWords(modification);

        POSCalculator posCalculator = new POSCalculator();

        return features;

    }

    private void setFlags(Modification modification, FeaturesList features){

        if(modification.getNewCodeChunk() == null) {
            features.category = modification.getOldCodeChunk().getCategory().toString();
            features.group = computeGroup(modification.getOldCodeChunk().getType().toString());
            features.decomposedGroup = computeDecomposedGroup(modification.getOldCodeChunk().getType().toString());
            features.type = modification.getOldCodeChunk().getType().toString();
            features.subtype = modification.getOldCodeChunk().getSubtype().toString();
            return;
        }

        if(modification.getOldCodeChunk() == null) {
            features.category = modification.getNewCodeChunk().getCategory().toString();
            features.group = computeGroup(modification.getNewCodeChunk().getType().toString());
            features.decomposedGroup = computeDecomposedGroup(modification.getNewCodeChunk().getType().toString());
            features.type = modification.getNewCodeChunk().getType().toString();
            features.subtype = modification.getNewCodeChunk().getSubtype().toString();
            return;
        }

        if(modification.getOldCodeChunk() != null && modification.getNewCodeChunk() != null) {
            if(modification.getNewCodeChunk().getCategory().equals(modification.getOldCodeChunk().getCategory())) {
                features.category = modification.getOldCodeChunk().getCategory().toString();
                if (modification.getNewCodeChunk().getType().equals(modification.getOldCodeChunk().getType())) {
                    features.group = computeGroup(modification.getNewCodeChunk().getType().toString());
                    features.decomposedGroup = computeDecomposedGroup(modification.getNewCodeChunk().getType().toString());
                    features.type = modification.getNewCodeChunk().getType().toString();
                    if (modification.getNewCodeChunk().getSubtype().equals(modification.getOldCodeChunk().getSubtype())) {
                        features.subtype = modification.getNewCodeChunk().getSubtype().toString();
                    }
                }
                else {
                    features.group = computeGroup(modification.getNewCodeChunk().getType().toString());
                    features.decomposedGroup = computeDecomposedGroup(modification.getNewCodeChunk().getType().toString());
                }
            }
        }

    }

    private int computeLOC(CodeChunk codeChunk){
        if(codeChunk == null){
            return -1;
        }
        else {
            return codeChunk.getLines().size();
        }
    }

    private int[] computeLOCTypes(CodeChunk codeChunk){

        int[] LOCtypes = new int[3];

        if(codeChunk == null){
            LOCtypes[0] = -1;
            LOCtypes[1] = -1;
            LOCtypes[2] = -1;
            return LOCtypes;
        }

        int commentsLOC = 0;
        int execLOC = 0;
        int blankLOC = 0;
        for(DiffLine diffLine : codeChunk.getLines()){
            if(isLineBlankLine(diffLine.getDiffLine())){
                blankLOC++;
            }
            else if(isLineAComment(diffLine.getDiffLine())){
                commentsLOC++;
            }
            else {
                execLOC++;
            }
        }
        LOCtypes[0] = commentsLOC;
        LOCtypes[1] = blankLOC;
        LOCtypes[2] = execLOC;
        return LOCtypes;
    }

    private boolean isLineBlankLine(String line){
        if(line.equals("")){
            return true;
        }
        if(line.isEmpty()){
            return true;
        }
        else {
            return false;
        }
    }

    private boolean isLineAComment(String line){

        if(!line.isEmpty()) {
            int charIndex = 0;
            while (charIndex+1<line.length() && Character.isWhitespace(line.charAt(charIndex))){
                charIndex++;
            }
            if (line.charAt(charIndex) == '/' && line.charAt(charIndex+1) == '/') {
                return true;
            }
            if (line.charAt(charIndex) == '/'  && line.charAt(charIndex+1) == '*') {
                return true;
            }
            if (charIndex+1 >= line.length()) {
                if (line.charAt(charIndex) == '*') {
                    return true;
                }
                else {
                    return false;
                }
            }
            if (line.charAt(charIndex) == '*' || line.charAt(charIndex+1) == '*') {
                return true;
            } else {
                return false;
            }
        } else {
            return false;
        }
    }

    private char computeFirstChar(CodeChunk codeChunk){
        if(codeChunk == null) {
            return Character.MIN_VALUE;
        }
        String line = codeChunk.getLines().get(0).getDiffLine();
        if(line.length()<=0){
            return Character.MIN_VALUE;
        }
        return line.charAt(0);
    }


    private char computeLastChar(CodeChunk codeChunk){
        if(codeChunk == null) {
            return Character.MIN_VALUE;
        }
        int linesSize = codeChunk.getLines().size();
        String line = codeChunk.getLines().get(linesSize-1).getDiffLine();
        if(line.length()<=0){
            return Character.MIN_VALUE;
        }
        return line.charAt(line.length()-1);
    }


    private int[] computeNumberOfCharacters(Modification modification){
        int[] charModifiedCounter = new int[2];

        if(modification.getOldCodeChunk()==null || modification.getNewCodeChunk()==null){
            charModifiedCounter[0] = -1;
            charModifiedCounter[1] = -1;
            return charModifiedCounter;
        }

        StringDiffer stringDiffer = new StringDiffer();
        String oldChunk = concatLines(modification.getOldCodeChunk().getLines());
        String newChunk = concatLines(modification.getNewCodeChunk().getLines());
        LinkedList<LocalDiffMatchPatch.Diff> diffLinkedList = stringDiffer.createCharList(oldChunk, newChunk);
        int charsAdded = 0;
        int charsDeleted = 0;
        for(LocalDiffMatchPatch.Diff diff : diffLinkedList) {
            if(diff.operation.equals(LocalDiffMatchPatch.Operation.INSERT)) {
                charsAdded = charsAdded + diff.text.length();
            }
            if(diff.operation.equals(LocalDiffMatchPatch.Operation.DELETE)) {
                charsDeleted = charsDeleted + diff.text.length();
            }
        }
        charModifiedCounter[0] = charsAdded;
        charModifiedCounter[1] = charsDeleted;
        return charModifiedCounter;
    }


    private int[] computeNumberOfWordsModified(Modification modification){

        int[] wordsModifiedCounter = new int[2];

        if(modification.getOldCodeChunk()==null || modification.getNewCodeChunk()==null){
            wordsModifiedCounter[0] = -1;
            wordsModifiedCounter[1] = -1;
            return wordsModifiedCounter;
        }

        StringDiffer stringDiffer = new StringDiffer();
        String oldChunk = concatLines(modification.getOldCodeChunk().getLines());
        String newChunk = concatLines(modification.getNewCodeChunk().getLines());
        LinkedList<LocalDiffMatchPatch.Diff> diffLinkedList = stringDiffer.createDiffList(oldChunk, newChunk);
        int wordsAdded = 0;
        int wordsDeleted = 0;
        for(LocalDiffMatchPatch.Diff diff : diffLinkedList) {
            if(diff.operation.equals(LocalDiffMatchPatch.Operation.INSERT)) {
                wordsAdded++;
            }
            if(diff.operation.equals(LocalDiffMatchPatch.Operation.DELETE)) {
                wordsDeleted++;
            }
        }
        wordsModifiedCounter[0] = wordsAdded;
        wordsModifiedCounter[1] = wordsDeleted;
        return wordsModifiedCounter;
    }


    private KeyWordsInfo computeNumberOfKeywords(Modification modification){
        StringDiffer stringDiffer = new StringDiffer();
        String firstKeyWordFound = "";

        String oldChunk = "";
        String newChunk = "";

        if(modification.getOldCodeChunk() != null) {
            oldChunk = concatLines(modification.getOldCodeChunk().getLines());
        }
        if(modification.getNewCodeChunk() != null) {
            newChunk = concatLines(modification.getNewCodeChunk().getLines());
        }

        LinkedList<LocalDiffMatchPatch.Diff> diffLinkedList = stringDiffer.createDiffList(oldChunk, newChunk);

        int numberOfKeyWords = 0;

        for(LocalDiffMatchPatch.Diff diff : diffLinkedList) {
            if(diff.operation.equals(LocalDiffMatchPatch.Operation.INSERT)) {
                for(String keyword :keywords) {
                    if(diff.text.equals(keyword)) {
                        if(numberOfKeyWords==0){
                            firstKeyWordFound = keyword;
                        }
                        numberOfKeyWords++;
                    }
                }
            }
            if(diff.operation.equals(LocalDiffMatchPatch.Operation.DELETE)) {
               for(String keyword :keywords) {
                    if(diff.text.equals(keyword)) {
                        if(numberOfKeyWords==0){
                            firstKeyWordFound = keyword;
                        }
                        numberOfKeyWords++;
                    }
                }
            }
        }

        if(numberOfKeyWords==1){
            return new KeyWordsInfo(numberOfKeyWords, firstKeyWordFound);
        }
        else {
            return new KeyWordsInfo(numberOfKeyWords, null);
        }
    }


    private double computeLexicalSimilarity(Modification modification){
        if(modification.getOldCodeChunk()==null || modification.getNewCodeChunk()==null){
            return -1;
        }
        String oldChunk = concatLines(modification.getOldCodeChunk().getLines());
        String newChunk = concatLines(modification.getNewCodeChunk().getLines());
        return computeChangeRelatedSimilarity(oldChunk, newChunk);
    }


    private static double computeChangeRelatedSimilarity(String s1, String s2) {

        String longer = s1, shorter = s2;
        LevenshteinDistance distance = new LevenshteinDistance();
        if (s1.length() < s2.length()) {
            longer = s2; shorter = s1;
        }
        int longerLength = longer.length();
        if (longerLength == 0) { return 1.0; }

        if(longer.length()>500000 && shorter.length()>500000){
            return -2;
        }

        double distanceComputed = (longerLength - distance.apply(longer, shorter)) / (double) longerLength;

        return distanceComputed;
    }



    private String concatLines(List<DiffLine> lines){
        String concatLine = "";
        for(DiffLine diffLine : lines){
            concatLine = concatLine.concat(diffLine.getDiffLine());
        }
        return concatLine;
    }


    private List<Integer> computeCommentsKeywords(Modification modification){

        List<List<String>> keywords = new ArrayList<>();
        keywords.add(new ArrayList<>(Arrays.asList("comment", "style", "messag", "string", "log", "error", "read")));
        keywords.add(new ArrayList<>(Arrays.asList("method", "scope", "enum", "tag", "call")));
        keywords.add(new ArrayList<>(Arrays.asList("return")));
        keywords.add(new ArrayList<>(Arrays.asList("chang", "remov", "miss", "order", "delet", "sort")));
        keywords.add(new ArrayList<>(Arrays.asList("final")));
        keywords.add(new ArrayList<>(Arrays.asList("test", "bug")));
        keywords.add(new ArrayList<>(Arrays.asList("add", "implement", "ad")));

        List<Integer> wordsContained = new ArrayList<>();

        if(modification.getOldCodeChunk()==null){
            for(int i=0; i<keywords.size(); i++) {
                wordsContained.add(-1);
            }
            return wordsContained;
        }
        if(modification.getOldCodeChunk().getComments()==null || modification.getOldCodeChunk().getComments().isEmpty()){
            for(int i=0; i<keywords.size(); i++) {
                wordsContained.add(-1);
            }
            return wordsContained;
        }

        String concatComments = "";
        for(Comment comment : modification.getOldCodeChunk().getComments()){
            concatComments = concatComments.concat(comment.getMessage());
        }
        String commentsSeparate = concatComments.replaceAll("(?!^)(_)"," $1");
        commentsSeparate = commentsSeparate.replaceAll("(?!^)([\\d])"," $1");
        commentsSeparate = commentsSeparate.replaceAll("(?!^)([A-Z])", " $1");
        String  commentWithoutSpecialChar = commentsSeparate.replaceAll("[^\\w\\s]","");
        String commentToLowercase = commentWithoutSpecialChar.toLowerCase();


        for(int i=0; i<keywords.size(); i++){
            boolean containsWord = false;
            int j = 0;
            while(j<keywords.get(i).size() && containsWord==false){
                if(commentToLowercase.contains(keywords.get(i).get(j))){
                    wordsContained.add(1);
                    containsWord = true;
                }
                else {
                    j++;
                }
            }
            if(containsWord==false){
                wordsContained.add(0);
            }
        }
        return wordsContained;
    }


    private String computeGroup(String typeString){
        if(typeString.equals("UNKNOWN")){
            return "UNKNOWN";
        }
        if(typeString.equals("TEXTUAL") || typeString.equals("SUPPORTEDBYLANGUAGE")){
            return "DOCUMENTATION";
        }
        if(typeString.equals("VISUALREPRESENTATION")){
            return "VISUALREPRESENTATION";
        }
        if(typeString.equals("SOLUTIONAPPROACH")||typeString.equals("ORGANIZATION")){
            return "STRUCTURE";
        }
       return "FUNCTIONAL";
    }

    private String computeDecomposedGroup(String typeString){
        if(typeString.equals("UNKNOWN")){
            return "UNKNOWN";
        }
        if(typeString.equals("TEXTUAL") || typeString.equals("SUPPORTEDBYLANGUAGE")){
            return "DOCUMENTATION";
        }
        if(typeString.equals("VISUALREPRESENTATION")){
            return "VISUALREPRESENTATION";
        }
        if(typeString.equals("SOLUTIONAPPROACH")||typeString.equals("ORGANIZATION")){
            return "STRUCTURE";
        }
        if(typeString.equals("INTERFACE")) {
            return "INTERFACE";
        }
        if(typeString.equals("LOGIC")) {
            return "LOGIC";
        }
        if(typeString.equals("RESOURCE")) {
            return "RESOURCE";
        }
        if(typeString.equals("CHECK")) {
            return "CHECK";
        }
        if(typeString.equals("SUPPORT")) {
            return "SUPPORT";
        }
        if(typeString.equals("LARGERDEFECTS")) {
            return "LARGERDEFECTS";
        }
        return null;
    }

}
