package backend.analysis.machineLearning;

import backend.analysis.textualSimilarities.LDA;
import backend.analysis.textualSimilarities.TFIDFCalculator;
import backend.base.gerrit_data.Comment;
import backend.base.gerrit_data.FeaturesList;
import backend.base.gerrit_data.Modification;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.FastVector;
import weka.core.Instances;

import java.util.ArrayList;
import java.util.List;

/**
 * Class to convert Modification into ClassifierEntry objects
 * and write them to a .arff file.
 */

public class ModificationConverter {

    public List<Modification> extractFeatures(List<Modification> modifications, boolean setFlags){

        createDocumentsLibrary(modifications);

        FeaturesExtractor featuresExtractor = new FeaturesExtractor();

        for(Modification m : modifications) {
            m.setFeatureList(featuresExtractor.computeFeatures(m, setFlags));
        }

        return modifications;
    }


    private void createDocumentsLibrary(List<Modification> modifications){
        List<String> comments = new ArrayList<>();
        for(Modification m : modifications){
            if(m.getOldCodeChunk()!=null){
                if(m.getOldCodeChunk().getComments()!=null && !(m.getOldCodeChunk().getComments().isEmpty())){
                    String commentsConcat = "";
                    for(Comment c : m.getOldCodeChunk().getComments()){
                        commentsConcat = commentsConcat.concat(c.getMessage());
                    }
                    comments.add(commentsConcat);
                }
            }
        }
        TFIDFCalculator tfidfCalculator = TFIDFCalculator.getInstance();
        tfidfCalculator.setCommentsList(comments);

        LDA lda = new LDA();
        //lda.computeLDA(comments);
        lda.topicCount(comments);

    }


    public Instances createCategoryInstances(List<Modification> classifierEntries){
        FastVector atts;
        FastVector attsRel;
        FastVector attVals;
        FastVector attValsRel;
        Instances data;
        Instances dataRel;
        double[] vals;
        double[] valsRel;

        ArrayList classVal;

        // Set up attributes
        atts = new FastVector();

        classVal = new ArrayList();

        classVal.add("EVOLVABILITY");
        classVal.add("FUNCTIONAL");


        atts.addElement(new Attribute("oldLoc"));
        atts.addElement(new Attribute("oldLocComments"));
        atts.addElement(new Attribute("oldLocExec"));
        atts.addElement(new Attribute("oldLocBlank"));
        atts.addElement(new Attribute("oldFirstChar"));
        atts.addElement(new Attribute("oldLastChar"));
        atts.addElement(new Attribute("oldCyclomaticComplexity"));

        atts.addElement(new Attribute("newLoc"));
        atts.addElement(new Attribute("newLocComments"));
        atts.addElement(new Attribute("newLocExec"));
        atts.addElement(new Attribute("newLocBlank"));
        atts.addElement(new Attribute("newFirstChar"));
        atts.addElement(new Attribute("newLastChar"));
        atts.addElement(new Attribute("newCyclomaticComplexity"));

        atts.addElement(new Attribute("numberOfAddedWords"));
        atts.addElement(new Attribute("numberOfDeletedWords"));
        atts.addElement(new Attribute("numberOfAddedCharacters"));
        atts.addElement(new Attribute("numberOfDeletedCharacters"));
        atts.addElement(new Attribute("levenshteinDistance"));
        atts.addElement(new Attribute("numKeywords"));
        atts.addElement(new Attribute("keyword"));

        atts.addElement(new Attribute("oneIf"));
        atts.addElement(new Attribute("brackets"));
        atts.addElement(new Attribute("commentsWords"));

        atts.addElement(new Attribute("cyclomaticDiff"));
        atts.addElement(new Attribute("LOCExecDiff"));
        atts.addElement(new Attribute("LOCDiff"));
        atts.addElement(new Attribute("LOCCommentsDiff"));

        atts.addElement(new Attribute("oneIFDel"));
        atts.addElement(new Attribute("Functions changed"));
        atts.addElement(new Attribute("Functions added"));
        atts.addElement(new Attribute("Cycles diff"));
        atts.addElement(new Attribute("assignment diff"));

        atts.addElement(new Attribute("if diff"));
        atts.addElement(new Attribute("min and p diff"));

        atts.addElement(new Attribute("New diff"));
        atts.addElement(new Attribute("New added"));
        atts.addElement(new Attribute("Brackets diff"));

        atts.addElement(new Attribute("Comma diff"));

        atts.addElement(new Attribute("commentWord"));
        atts.addElement(new Attribute("methodWord"));
        atts.addElement(new Attribute("returnWord"));
        atts.addElement(new Attribute("changWord"));
        atts.addElement(new Attribute("finalWord"));
        atts.addElement(new Attribute("testWord"));
        atts.addElement(new Attribute("addWord"));


        atts.addElement(new Attribute("category", classVal));

        attsRel = new FastVector();
        attVals = new FastVector();

        data = new Instances("MyRelation", atts, 0);

        int evolCount = 0;
        int funCount = 0;


        for(Modification ce : classifierEntries) {

            if (ce.getFeatures().category != null && !ce.getFeatures().category.equals("UNKNOWN")) {

                vals = new double[data.numAttributes()];


                FeaturesList featuresList = ce.getFeatures();

                vals[0] = featuresList.oldLOC;
                vals[1] = featuresList.oldLOCComments;
                vals[2] = featuresList.oldLOCExec;
                vals[3] = featuresList.oldLOCBlank;
                vals[4] = featuresList.oldFirstChar;
                vals[5] = featuresList.oldLastChar;
                vals[6] = featuresList.oldCyclomaticComplexity;

                vals[7] = featuresList.newLOC;
                vals[8] = featuresList.newLOCComments;
                vals[9] = featuresList.newLOCExec;
                vals[10] = featuresList.newLOCBlank;
                vals[11] = featuresList.newFirstChar;
                vals[12] = featuresList.newLastChar;
                vals[13] = featuresList.newCyclomaticComplexity;

                vals[14] = featuresList.numberOfAddedWords;
                vals[15] = featuresList.numberOfDeletedWords;
                vals[16] = featuresList.numberOfAddedCharacters;
                vals[17] = featuresList.numberOfDeletedCharacters;
                vals[18] = featuresList.levenshteinDistance;
                vals[19] = featuresList.numKeywords;
                vals[20] = convertKeyWordToInt(featuresList.keyword);

                vals[21] = featuresList.ifAdded;
                vals[22] = featuresList.brackets;
                vals[23] = featuresList.wordsInComment;

                vals[24] = featuresList.cyclomaticDiff;
                vals[25] = featuresList.LOCExecDiff;
                vals[26] = featuresList.LOCDiff;
                vals[27] = featuresList.LOCCommentsDiff;

                vals[28] = featuresList.countFunctionsDiff;
                vals[29] = featuresList.countFunctionsChanged;
                vals[30] = featuresList.countFunctionsAdded;

                vals[31] = featuresList.cyclesDiff;
                vals[32] = featuresList.assignmentDiff;

                vals[33] = featuresList.countIfDiff;
                vals[34] = featuresList.countMinusAndPlus;

                vals[35] = featuresList.comparisonDiff;
                vals[36] = featuresList.countNewAdded;
                vals[37] = featuresList.bracketsDiff;

                vals[38] = featuresList.commasDiff;

                for(int i=1; i<=ce.getFeatures().commentKeywords.size(); i++){
                   vals[38+i] = ce.getFeatures().commentKeywords.get(i-1);
                }

                int sizeWords = ce.getFeatures().commentKeywords.size();

                vals[39+sizeWords] = convertCategoryToInt(featuresList.category);

                if(featuresList.category.equals("EVOLVABILITY")) {
                    evolCount++;
                }
                if(featuresList.category.equals("FUNCTIONAL")) {
                    funCount++;
                }


                data.add(new DenseInstance(1.0, vals));
            }
        }

        System.out.println(data.size());

        System.out.println("=========");
        System.out.println("Evolvability:  " + evolCount);
        System.out.println("Functional:   " + funCount);

        return data;
    }


    /*
    Method to create instances remove features not relevant based on feature selection for cross-project methods at category-level
     */
    public Instances createCategoryInstancesLeaveOneOut(List<Modification> classifierEntries){
        FastVector atts;
        FastVector attsRel;
        FastVector attVals;
        FastVector attValsRel;
        Instances data;
        Instances dataRel;
        double[] vals;
        double[] valsRel;

        ArrayList classVal;

        // Set up attributes
        atts = new FastVector();

        classVal = new ArrayList();

        classVal.add("EVOLVABILITY");
        classVal.add("FUNCTIONAL");


        atts.addElement(new Attribute("oldLoc"));
        atts.addElement(new Attribute("oldLocComments"));
        atts.addElement(new Attribute("oldLocExec"));
        atts.addElement(new Attribute("oldLocBlank"));
        atts.addElement(new Attribute("oldFirstChar"));
        atts.addElement(new Attribute("oldLastChar"));
        atts.addElement(new Attribute("oldCyclomaticComplexity"));

        atts.addElement(new Attribute("newLoc"));
        atts.addElement(new Attribute("newLocComments"));
        atts.addElement(new Attribute("newLocExec"));
        atts.addElement(new Attribute("newLocBlank"));
        atts.addElement(new Attribute("newFirstChar"));
        atts.addElement(new Attribute("newLastChar"));
        atts.addElement(new Attribute("newCyclomaticComplexity"));

        atts.addElement(new Attribute("numberOfAddedWords"));
        atts.addElement(new Attribute("numberOfDeletedWords"));
        atts.addElement(new Attribute("numberOfAddedCharacters"));
        atts.addElement(new Attribute("numberOfDeletedCharacters"));
        atts.addElement(new Attribute("levenshteinDistance"));
        atts.addElement(new Attribute("numKeywords"));
        atts.addElement(new Attribute("keyword"));

        atts.addElement(new Attribute("oneIf"));
        //atts.addElement(new Attribute("brackets"));
        atts.addElement(new Attribute("commentsWords"));

        atts.addElement(new Attribute("cyclomaticDiff"));
        atts.addElement(new Attribute("LOCExecDiff"));
        atts.addElement(new Attribute("LOCDiff"));
        atts.addElement(new Attribute("LOCCommentsDiff"));

        atts.addElement(new Attribute("oneIFDel"));
        //atts.addElement(new Attribute("Functions changed"));
        atts.addElement(new Attribute("Functions added"));
        atts.addElement(new Attribute("Cycles diff"));
        //atts.addElement(new Attribute("assignment diff"));

        atts.addElement(new Attribute("if diff"));
        atts.addElement(new Attribute("min and p diff"));

        atts.addElement(new Attribute("New diff"));
        atts.addElement(new Attribute("New added"));
        //atts.addElement(new Attribute("Brackets diff"));

        atts.addElement(new Attribute("Comma diff"));

        atts.addElement(new Attribute("commentWord"));
        //atts.addElement(new Attribute("methodWord"));
        atts.addElement(new Attribute("returnWord"));
        //atts.addElement(new Attribute("changWord"));
        atts.addElement(new Attribute("finalWord"));
        atts.addElement(new Attribute("testWord"));
        atts.addElement(new Attribute("addWord"));


        atts.addElement(new Attribute("category", classVal));

        attsRel = new FastVector();
        attVals = new FastVector();

        data = new Instances("MyRelation", atts, 0);

        int evolCount = 0;
        int funCount = 0;


        for(Modification ce : classifierEntries) {

            if (ce.getFeatures().category != null && !ce.getFeatures().category.equals("UNKNOWN")) {

                vals = new double[data.numAttributes()];


                FeaturesList featuresList = ce.getFeatures();

                vals[0] = featuresList.oldLOC;
                vals[1] = featuresList.oldLOCComments;
                vals[2] = featuresList.oldLOCExec;
                vals[3] = featuresList.oldLOCBlank;
                vals[4] = featuresList.oldFirstChar;
                vals[5] = featuresList.oldLastChar;
                vals[6] = featuresList.oldCyclomaticComplexity;

                vals[7] = featuresList.newLOC;
                vals[8] = featuresList.newLOCComments;
                vals[9] = featuresList.newLOCExec;
                vals[10] = featuresList.newLOCBlank;
                vals[11] = featuresList.newFirstChar;
                vals[12] = featuresList.newLastChar;
                vals[13] = featuresList.newCyclomaticComplexity;

                vals[14] = featuresList.numberOfAddedWords;
                vals[15] = featuresList.numberOfDeletedWords;
                vals[16] = featuresList.numberOfAddedCharacters;
                vals[17] = featuresList.numberOfDeletedCharacters;
                vals[18] = featuresList.levenshteinDistance;
                vals[19] = featuresList.numKeywords;
                vals[20] = convertKeyWordToInt(featuresList.keyword);

                vals[21] = featuresList.ifAdded;
                //vals[22] = featuresList.brackets;
                vals[22] = featuresList.wordsInComment;

                vals[23] = featuresList.cyclomaticDiff;
                vals[24] = featuresList.LOCExecDiff;
                vals[25] = featuresList.LOCDiff;
                vals[26] = featuresList.LOCCommentsDiff;

                vals[27] = featuresList.countFunctionsDiff;
                //vals[28] = featuresList.countFunctionsChanged;
                vals[28] = featuresList.countFunctionsAdded;

                vals[29] = featuresList.cyclesDiff;
                //vals[30] = featuresList.assignmentDiff;

                vals[30] = featuresList.countIfDiff;
                vals[31] = featuresList.countMinusAndPlus;

                vals[32] = featuresList.comparisonDiff;
                vals[33] = featuresList.countNewAdded;
                //vals[34] = featuresList.bracketsDiff;

                vals[34] = featuresList.commasDiff;


                vals[35] = ce.getFeatures().commentKeywords.get(0);
                vals[36] = ce.getFeatures().commentKeywords.get(2);
                vals[37] = ce.getFeatures().commentKeywords.get(4);
                vals[38] = ce.getFeatures().commentKeywords.get(5);
                vals[39] = ce.getFeatures().commentKeywords.get(6);


                int sizeWords = ce.getFeatures().commentKeywords.size();

                vals[40] = convertCategoryToInt(featuresList.category);

                if(featuresList.category.equals("EVOLVABILITY")) {
                    evolCount++;
                }
                if(featuresList.category.equals("FUNCTIONAL")) {
                    funCount++;
                }


                data.add(new DenseInstance(1.0, vals));
            }
        }

        System.out.println(data.size());

        System.out.println("=========");
        System.out.println("Evolvability:  " + evolCount);
        System.out.println("Functional:   " + funCount);

        return data;
    }


    public Instances createGroupInstances(List<Modification> classifierEntries){
        FastVector atts;
        FastVector attsRel;
        FastVector attVals;
        FastVector attValsRel;
        Instances data;
        Instances dataRel;
        double[] vals;
        double[] valsRel;

        ArrayList classVal;

        // Set up attributes
        atts = new FastVector();

        classVal = new ArrayList();

        classVal.add("DOCUMENTATION");
        classVal.add("VISUALREPRESENTATION");
        classVal.add("STRUCTURE");
        classVal.add("FUNCTIONAL");


        atts.addElement(new Attribute("oldLoc"));
        atts.addElement(new Attribute("oldLocComments"));
        atts.addElement(new Attribute("oldLocExec"));
        atts.addElement(new Attribute("oldLocBlank"));
        atts.addElement(new Attribute("oldFirstChar"));
        atts.addElement(new Attribute("oldLastChar"));
        atts.addElement(new Attribute("oldCyclomaticComplexity"));

        atts.addElement(new Attribute("newLoc"));
        atts.addElement(new Attribute("newLocComments"));
        atts.addElement(new Attribute("newLocExec"));
        atts.addElement(new Attribute("newLocBlank"));
        atts.addElement(new Attribute("newFirstChar"));
        atts.addElement(new Attribute("newLastChar"));
        atts.addElement(new Attribute("newCyclomaticComplexity"));

        atts.addElement(new Attribute("numberOfAddedWords"));
        atts.addElement(new Attribute("numberOfDeletedWords"));
        atts.addElement(new Attribute("numberOfAddedCharacters"));
        atts.addElement(new Attribute("numberOfDeletedCharacters"));
        atts.addElement(new Attribute("levenshteinDistance"));
        atts.addElement(new Attribute("numKeywords"));
        atts.addElement(new Attribute("keyword"));

        atts.addElement(new Attribute("if added"));
        atts.addElement(new Attribute("brackets"));
        atts.addElement(new Attribute("commentsWords"));

        atts.addElement(new Attribute("cyclomaticDiff"));
        atts.addElement(new Attribute("LOCExecDiff"));
        atts.addElement(new Attribute("LOCDiff"));
        atts.addElement(new Attribute("LOCCommentsDiff"));

        atts.addElement(new Attribute("Functions diff"));
        atts.addElement(new Attribute("Functions changed"));
        atts.addElement(new Attribute("Functions added"));
        atts.addElement(new Attribute("Cycles diff"));
        atts.addElement(new Attribute("assignment diff"));

        atts.addElement(new Attribute("if diff"));
        atts.addElement(new Attribute("min and p diff"));

        atts.addElement(new Attribute("New diff"));
        atts.addElement(new Attribute("New added"));
        atts.addElement(new Attribute("Brackets diff"));

        atts.addElement(new Attribute("Comma diff"));



        atts.addElement(new Attribute("commentWord"));
        atts.addElement(new Attribute("methodWord"));
        atts.addElement(new Attribute("returnWord"));
        atts.addElement(new Attribute("changWord"));
        atts.addElement(new Attribute("finalWord"));
        atts.addElement(new Attribute("testWord"));
        atts.addElement(new Attribute("addWord"));

        atts.addElement(new Attribute("category", classVal));

        attsRel = new FastVector();
        attVals = new FastVector();


        // Create Instances object
        data = new Instances("MyRelation", atts, 0);

        // Fill with data
        for(Modification ce : classifierEntries) {

            if (ce.getFeatures().group != null && !ce.getFeatures().group.equals("UNKNOWN")) {

                vals = new double[data.numAttributes()];
                // - numeric

                FeaturesList featuresList = ce.getFeatures();

                vals[0] = featuresList.oldLOC;
                vals[1] = featuresList.oldLOCComments;
                vals[2] = featuresList.oldLOCExec;
                vals[3] = featuresList.oldLOCBlank;
                vals[4] = featuresList.oldFirstChar;
                vals[5] = featuresList.oldLastChar;
                vals[6] = featuresList.oldCyclomaticComplexity;

                vals[7] = featuresList.newLOC;
                vals[8] = featuresList.newLOCComments;
                vals[9] = featuresList.newLOCExec;
                vals[10] = featuresList.newLOCBlank;
                vals[11] = featuresList.newFirstChar;
                vals[12] = featuresList.newLastChar;
                vals[13] = featuresList.newCyclomaticComplexity;

                vals[14] = featuresList.numberOfAddedWords;
                vals[15] = featuresList.numberOfDeletedWords;
                vals[16] = featuresList.numberOfAddedCharacters;
                vals[17] = featuresList.numberOfDeletedCharacters;
                vals[18] = featuresList.levenshteinDistance;
                vals[19] = featuresList.numKeywords;
                vals[20] = convertKeyWordToInt(featuresList.keyword);

                vals[21] = featuresList.ifAdded;
                vals[22] = featuresList.brackets;
                vals[23] = featuresList.wordsInComment;


                vals[24] = featuresList.cyclomaticDiff;
                vals[25] = featuresList.LOCExecDiff;
                vals[26] = featuresList.LOCDiff;
                vals[27] = featuresList.LOCCommentsDiff;

                vals[28] = featuresList.countFunctionsDiff;
                vals[29] = featuresList.countFunctionsChanged;
                vals[30] = featuresList.countFunctionsAdded;

                vals[31] = featuresList.cyclesDiff;
                vals[32] = featuresList.assignmentDiff;

                vals[33] = featuresList.countIfDiff;
                vals[34] = featuresList.countMinusAndPlus;

                vals[35] = featuresList.comparisonDiff;
                vals[36] = featuresList.countNewAdded;
                vals[37] = featuresList.bracketsDiff;

                vals[38] = featuresList.commasDiff;

                for(int i=1; i<=ce.getFeatures().commentKeywords.size(); i++){
                    vals[38+i] = ce.getFeatures().commentKeywords.get(i-1);
                }

                int sizeWords = ce.getFeatures().commentKeywords.size();
                vals[39+sizeWords] = convertGroupToInt(featuresList.group);


                data.add(new DenseInstance(1.0, vals));
            }
        }

        System.out.println(data.size());

        return data;
    }


    public Instances createGroupInstancesLeaveOneOut(List<Modification> classifierEntries){
        FastVector atts;
        FastVector attsRel;
        FastVector attVals;
        FastVector attValsRel;
        Instances data;
        Instances dataRel;
        double[] vals;
        double[] valsRel;

        ArrayList classVal;

        // Set up attributes
        atts = new FastVector();

        classVal = new ArrayList();

        classVal.add("DOCUMENTATION");
        classVal.add("VISUALREPRESENTATION");
        classVal.add("STRUCTURE");
        classVal.add("FUNCTIONAL");


        atts.addElement(new Attribute("oldLoc"));
        atts.addElement(new Attribute("oldLocComments"));
        atts.addElement(new Attribute("oldLocExec"));
        atts.addElement(new Attribute("oldLocBlank"));
        atts.addElement(new Attribute("oldFirstChar"));
        atts.addElement(new Attribute("oldLastChar"));
        atts.addElement(new Attribute("oldCyclomaticComplexity"));

        atts.addElement(new Attribute("newLoc"));
        atts.addElement(new Attribute("newLocComments"));
        atts.addElement(new Attribute("newLocExec"));
        atts.addElement(new Attribute("newLocBlank"));
        atts.addElement(new Attribute("newFirstChar"));
        atts.addElement(new Attribute("newLastChar"));
        atts.addElement(new Attribute("newCyclomaticComplexity"));

        atts.addElement(new Attribute("numberOfAddedWords"));
        atts.addElement(new Attribute("numberOfDeletedWords"));
        atts.addElement(new Attribute("numberOfAddedCharacters"));
        atts.addElement(new Attribute("numberOfDeletedCharacters"));
        atts.addElement(new Attribute("levenshteinDistance"));
        atts.addElement(new Attribute("numKeywords"));
        atts.addElement(new Attribute("keyword"));

        atts.addElement(new Attribute("if added"));
        //atts.addElement(new Attribute("brackets"));
        atts.addElement(new Attribute("commentsWords"));

        atts.addElement(new Attribute("cyclomaticDiff"));
        atts.addElement(new Attribute("LOCExecDiff"));
        atts.addElement(new Attribute("LOCDiff"));
        atts.addElement(new Attribute("LOCCommentsDiff"));

        atts.addElement(new Attribute("Functions diff"));
        atts.addElement(new Attribute("Functions changed"));
        atts.addElement(new Attribute("Functions added"));
        atts.addElement(new Attribute("Cycles diff"));
        atts.addElement(new Attribute("assignment diff"));

        atts.addElement(new Attribute("if diff"));
        atts.addElement(new Attribute("min and p diff"));

        atts.addElement(new Attribute("New diff"));
        atts.addElement(new Attribute("New added"));
        atts.addElement(new Attribute("Brackets diff"));

        atts.addElement(new Attribute("Comma diff"));



        atts.addElement(new Attribute("commentWord"));
        atts.addElement(new Attribute("methodWord"));
        atts.addElement(new Attribute("returnWord"));
        atts.addElement(new Attribute("changWord"));
        atts.addElement(new Attribute("finalWord"));
        atts.addElement(new Attribute("testWord"));
        atts.addElement(new Attribute("addWord"));

        atts.addElement(new Attribute("category", classVal));

        attsRel = new FastVector();
        attVals = new FastVector();


        // Create Instances object
        data = new Instances("MyRelation", atts, 0);

        // Fill with data
        for(Modification ce : classifierEntries) {

            if (ce.getFeatures().group != null && !ce.getFeatures().group.equals("UNKNOWN")) {

                vals = new double[data.numAttributes()];
                // - numeric

                FeaturesList featuresList = ce.getFeatures();

                vals[0] = featuresList.oldLOC;
                vals[1] = featuresList.oldLOCComments;
                vals[2] = featuresList.oldLOCExec;
                vals[3] = featuresList.oldLOCBlank;
                vals[4] = featuresList.oldFirstChar;
                vals[5] = featuresList.oldLastChar;
                vals[6] = featuresList.oldCyclomaticComplexity;

                vals[7] = featuresList.newLOC;
                vals[8] = featuresList.newLOCComments;
                vals[9] = featuresList.newLOCExec;
                vals[10] = featuresList.newLOCBlank;
                vals[11] = featuresList.newFirstChar;
                vals[12] = featuresList.newLastChar;
                vals[13] = featuresList.newCyclomaticComplexity;

                vals[14] = featuresList.numberOfAddedWords;
                vals[15] = featuresList.numberOfDeletedWords;
                vals[16] = featuresList.numberOfAddedCharacters;
                vals[17] = featuresList.numberOfDeletedCharacters;
                vals[18] = featuresList.levenshteinDistance;
                vals[19] = featuresList.numKeywords;
                vals[20] = convertKeyWordToInt(featuresList.keyword);

                vals[21] = featuresList.ifAdded;
                //vals[22] = featuresList.brackets;
                vals[22] = featuresList.wordsInComment;


                vals[23] = featuresList.cyclomaticDiff;
                vals[24] = featuresList.LOCExecDiff;
                vals[25] = featuresList.LOCDiff;
                vals[26] = featuresList.LOCCommentsDiff;

                vals[27] = featuresList.countFunctionsDiff;
                vals[28] = featuresList.countFunctionsChanged;
                vals[29] = featuresList.countFunctionsAdded;

                vals[30] = featuresList.cyclesDiff;
                vals[31] = featuresList.assignmentDiff;

                vals[32] = featuresList.countIfDiff;
                vals[33] = featuresList.countMinusAndPlus;

                vals[34] = featuresList.comparisonDiff;
                vals[35] = featuresList.countNewAdded;
                vals[36] = featuresList.bracketsDiff;

                vals[37] = featuresList.commasDiff;

                for(int i=1; i<=ce.getFeatures().commentKeywords.size(); i++){
                    vals[37+i] = ce.getFeatures().commentKeywords.get(i-1);
                }

                int sizeWords = ce.getFeatures().commentKeywords.size();
                vals[38+sizeWords] = convertGroupToInt(featuresList.group);


                data.add(new DenseInstance(1.0, vals));
            }
        }

        System.out.println(data.size());

        return data;
    }


    public Instances createStructVsFunctInstances(List<Modification> classifierEntries){
        FastVector atts;
        FastVector attsRel;
        FastVector attVals;
        FastVector attValsRel;
        Instances data;
        Instances dataRel;
        double[] vals;
        double[] valsRel;

        ArrayList classVal;

        // Set up attributes
        atts = new FastVector();

        classVal = new ArrayList();

        classVal.add("STRUCTURE");
        classVal.add("FUNCTIONAL");


        atts.addElement(new Attribute("oldLoc"));
        atts.addElement(new Attribute("oldLocComments"));
        atts.addElement(new Attribute("oldLocExec"));
        atts.addElement(new Attribute("oldLocBlank"));
        atts.addElement(new Attribute("oldFirstChar"));
        atts.addElement(new Attribute("oldLastChar"));
        atts.addElement(new Attribute("oldCyclomaticComplexity"));

        atts.addElement(new Attribute("newLoc"));
        atts.addElement(new Attribute("newLocComments"));
        atts.addElement(new Attribute("newLocExec"));
        atts.addElement(new Attribute("newLocBlank"));
        atts.addElement(new Attribute("newFirstChar"));
        atts.addElement(new Attribute("newLastChar"));
        atts.addElement(new Attribute("newCyclomaticComplexity"));

        atts.addElement(new Attribute("numberOfAddedWords"));
        atts.addElement(new Attribute("numberOfDeletedWords"));
        atts.addElement(new Attribute("numberOfAddedCharacters"));
        atts.addElement(new Attribute("numberOfDeletedCharacters"));
        atts.addElement(new Attribute("levenshteinDistance"));
        atts.addElement(new Attribute("numKeywords"));
        atts.addElement(new Attribute("keyword"));

        atts.addElement(new Attribute("if added"));
        atts.addElement(new Attribute("brackets"));
        atts.addElement(new Attribute("commentsWords"));

        atts.addElement(new Attribute("cyclomaticDiff"));
        atts.addElement(new Attribute("LOCExecDiff"));
        atts.addElement(new Attribute("LOCDiff"));
        atts.addElement(new Attribute("LOCCommentsDiff"));

        atts.addElement(new Attribute("Functions diff"));
        atts.addElement(new Attribute("Functions changed"));
        atts.addElement(new Attribute("Functions added"));
        atts.addElement(new Attribute("Cycles diff"));
        atts.addElement(new Attribute("assignment diff"));

        atts.addElement(new Attribute("if diff"));
        atts.addElement(new Attribute("min and p diff"));

        atts.addElement(new Attribute("New diff"));
        atts.addElement(new Attribute("New added"));
        atts.addElement(new Attribute("Brackets diff"));

        atts.addElement(new Attribute("Comma diff"));



        atts.addElement(new Attribute("commentWord"));
        atts.addElement(new Attribute("methodWord"));
        atts.addElement(new Attribute("returnWord"));
        atts.addElement(new Attribute("changWord"));
        atts.addElement(new Attribute("finalWord"));
        atts.addElement(new Attribute("testWord"));
        atts.addElement(new Attribute("addWord"));

        atts.addElement(new Attribute("category", classVal));

        attsRel = new FastVector();
        attVals = new FastVector();


        // Create Instances object
        data = new Instances("MyRelation", atts, 0);

        // Fill with data
        for(Modification ce : classifierEntries) {

            if (ce.getFeatures().group != null && !ce.getFeatures().group.equals("UNKNOWN")) {

                vals = new double[data.numAttributes()];
                // - numeric

                FeaturesList featuresList = ce.getFeatures();

                vals[0] = featuresList.oldLOC;
                vals[1] = featuresList.oldLOCComments;
                vals[2] = featuresList.oldLOCExec;
                vals[3] = featuresList.oldLOCBlank;
                vals[4] = featuresList.oldFirstChar;
                vals[5] = featuresList.oldLastChar;
                vals[6] = featuresList.oldCyclomaticComplexity;

                vals[7] = featuresList.newLOC;
                vals[8] = featuresList.newLOCComments;
                vals[9] = featuresList.newLOCExec;
                vals[10] = featuresList.newLOCBlank;
                vals[11] = featuresList.newFirstChar;
                vals[12] = featuresList.newLastChar;
                vals[13] = featuresList.newCyclomaticComplexity;

                vals[14] = featuresList.numberOfAddedWords;
                vals[15] = featuresList.numberOfDeletedWords;
                vals[16] = featuresList.numberOfAddedCharacters;
                vals[17] = featuresList.numberOfDeletedCharacters;
                vals[18] = featuresList.levenshteinDistance;
                vals[19] = featuresList.numKeywords;
                vals[20] = convertKeyWordToInt(featuresList.keyword);

                vals[21] = featuresList.ifAdded;
                vals[22] = featuresList.brackets;
                vals[23] = featuresList.wordsInComment;


                vals[24] = featuresList.cyclomaticDiff;
                vals[25] = featuresList.LOCExecDiff;
                vals[26] = featuresList.LOCDiff;
                vals[27] = featuresList.LOCCommentsDiff;

                vals[28] = featuresList.countFunctionsDiff;
                vals[29] = featuresList.countFunctionsChanged;
                vals[30] = featuresList.countFunctionsAdded;

                vals[31] = featuresList.cyclesDiff;
                vals[32] = featuresList.assignmentDiff;

                vals[33] = featuresList.countIfDiff;
                vals[34] = featuresList.countMinusAndPlus;

                vals[35] = featuresList.comparisonDiff;
                vals[36] = featuresList.countNewAdded;
                vals[37] = featuresList.bracketsDiff;

                vals[38] = featuresList.commasDiff;

                for(int i=1; i<=ce.getFeatures().commentKeywords.size(); i++){
                    vals[38+i] = ce.getFeatures().commentKeywords.get(i-1);
                }

                int sizeWords = ce.getFeatures().commentKeywords.size();
                vals[39+sizeWords] = convertStructAndFuncToInt(featuresList.group);
                if(vals[39+sizeWords] == 0 || vals[39+sizeWords] == 1) {
                    data.add(new DenseInstance(1.0, vals));
                }
            }
        }

        System.out.println(data.size());

        return data;
    }


    private int convertCategoryToInt(String category){
        if(category.equals("EVOLVABILITY")){
            return 0;
        }
        if(category.equals("FUNCTIONAL")){
            return 1;
        }
        return -1;
    }


    private int convertGroupToInt(String group){
        if(group.equals("DOCUMENTATION")){
            return 0;
        }
        if(group.equals("VISUALREPRESENTATION")){
            return 1;
        }
        if(group.equals("STRUCTURE")){
            return 2;
        }
        if(group.equals("FUNCTIONAL")){
            return 3;
        }
        return -1;
    }


    private int convertStructAndFuncToInt(String group){
        if(group.equals("STRUCTURE")){
            return 0;
        }
        if(group.equals("FUNCTIONAL")){
            return 1;
        }
        return -1;
    }


    public Instances createDecomposedGroupInstances(List<Modification> classifierEntries){
        FastVector atts;
        FastVector attsRel;
        FastVector attVals;
        FastVector attValsRel;
        Instances data;
        Instances dataRel;
        double[] vals;
        double[] valsRel;

        ArrayList classVal;

        // Set up attributes
        atts = new FastVector();

        classVal = new ArrayList();

        classVal.add("DOCUMENTATION");
        classVal.add("VISUALREPRESENTATION");
        classVal.add("STRUCTURE");
        classVal.add("INTERFACE");
        classVal.add("LOGIC");
        classVal.add("RESOURCE");
        classVal.add("CHECK");
        classVal.add("SUPPORT");
        classVal.add("LARGERDEFECTS");


        atts.addElement(new Attribute("oldLoc"));
        atts.addElement(new Attribute("oldLocComments"));
        atts.addElement(new Attribute("oldLocExec"));
        atts.addElement(new Attribute("oldLocBlank"));
        atts.addElement(new Attribute("oldFirstChar"));
        atts.addElement(new Attribute("oldLastChar"));
        atts.addElement(new Attribute("oldCyclomaticComplexity"));

        atts.addElement(new Attribute("newLoc"));
        atts.addElement(new Attribute("newLocComments"));
        atts.addElement(new Attribute("newLocExec"));
        atts.addElement(new Attribute("newLocBlank"));
        atts.addElement(new Attribute("newFirstChar"));
        atts.addElement(new Attribute("newLastChar"));
        atts.addElement(new Attribute("newCyclomaticComplexity"));

        atts.addElement(new Attribute("numberOfAddedWords"));
        atts.addElement(new Attribute("numberOfDeletedWords"));
        atts.addElement(new Attribute("numberOfAddedCharacters"));
        atts.addElement(new Attribute("numberOfDeletedCharacters"));
        atts.addElement(new Attribute("levenshteinDistance"));
        atts.addElement(new Attribute("numKeywords"));
        atts.addElement(new Attribute("keyword"));

        atts.addElement(new Attribute("oneIf"));
        atts.addElement(new Attribute("brackets"));
        atts.addElement(new Attribute("comments words"));

        atts.addElement(new Attribute("cyclomaticDiff"));
        atts.addElement(new Attribute("LOCExecDiff"));
        atts.addElement(new Attribute("LOCDiff"));
        atts.addElement(new Attribute("LOCCommentsDiff"));

        atts.addElement(new Attribute("oneIFDel"));
        atts.addElement(new Attribute("Functions changed"));
        atts.addElement(new Attribute("Functions added"));
        atts.addElement(new Attribute("Cycles diff"));
        atts.addElement(new Attribute("assignment diff"));

        atts.addElement(new Attribute("if diff"));
        atts.addElement(new Attribute("min and p diff"));

        atts.addElement(new Attribute("New"));
        atts.addElement(new Attribute("New added"));
        atts.addElement(new Attribute("Brackets diff"));

        atts.addElement(new Attribute("Comma diff"));



        atts.addElement(new Attribute("commentWord"));
        atts.addElement(new Attribute("methodWord"));
        atts.addElement(new Attribute("returnWord"));
        atts.addElement(new Attribute("changWord"));
        atts.addElement(new Attribute("finalWord"));
        atts.addElement(new Attribute("testWord"));
        atts.addElement(new Attribute("addWord"));


        atts.addElement(new Attribute("decomposedGroup", classVal));

        attsRel = new FastVector();
        attVals = new FastVector();

        // Create Instances object
        data = new Instances("MyRelation", atts, 0);

        // Fill with data
        for(Modification ce : classifierEntries) {

            if (ce.getFeatures().decomposedGroup != null && !ce.getFeatures().decomposedGroup.equals("UNKNOWN")) {

                vals = new double[data.numAttributes()];
                // - numeric

                FeaturesList featuresList = ce.getFeatures();

                vals[0] = featuresList.oldLOC;
                vals[1] = featuresList.oldLOCComments;
                vals[2] = featuresList.oldLOCExec;
                vals[3] = featuresList.oldLOCBlank;
                vals[4] = featuresList.oldFirstChar;
                vals[5] = featuresList.oldLastChar;
                vals[6] = featuresList.oldCyclomaticComplexity;

                vals[7] = featuresList.newLOC;
                vals[8] = featuresList.newLOCComments;
                vals[9] = featuresList.newLOCExec;
                vals[10] = featuresList.newLOCBlank;
                vals[11] = featuresList.newFirstChar;
                vals[12] = featuresList.newLastChar;
                vals[13] = featuresList.newCyclomaticComplexity;

                vals[14] = featuresList.numberOfAddedWords;
                vals[15] = featuresList.numberOfDeletedWords;
                vals[16] = featuresList.numberOfAddedCharacters;
                vals[17] = featuresList.numberOfDeletedCharacters;
                vals[18] = featuresList.levenshteinDistance;
                vals[19] = featuresList.numKeywords;
                vals[20] = convertKeyWordToInt(featuresList.keyword);

                vals[21] = featuresList.ifAdded;
                vals[22] = featuresList.brackets;
                vals[23] = featuresList.wordsInComment;

                vals[24] = featuresList.cyclomaticDiff;
                vals[25] = featuresList.LOCExecDiff;
                vals[26] = featuresList.LOCDiff;
                vals[27] = featuresList.LOCCommentsDiff;

                vals[28] = featuresList.countFunctionsDiff;
                vals[29] = featuresList.countFunctionsChanged;
                vals[30] = featuresList.countFunctionsAdded;

                vals[31] = featuresList.cyclesDiff;

                for(int i=1; i<=ce.getFeatures().commentKeywords.size(); i++){
                    vals[31+i] = ce.getFeatures().commentKeywords.get(i-1);
                }


                int sizeWords = ce.getFeatures().commentKeywords.size();
                vals[32+sizeWords] = convertDecomposedGroupToInt(featuresList.decomposedGroup);

                data.add(new DenseInstance(1.0, vals));
            }
        }

        System.out.println(data.size());

        return data;
    }


    private int convertDecomposedGroupToInt(String group){
        if(group.equals("DOCUMENTATION")){
            return 0;
        }
        if(group.equals("VISUALREPRESENTATION")){
            return 1;
        }
        if(group.equals("STRUCTURE")){
            return 2;
        }
        if(group.equals("INTERFACE")){
            return 3;
        }
        if(group.equals("LOGIC")){
            return 4;
        }
        if(group.equals("RESOURCE")){
            return 5;
        }
        if(group.equals("CHECK")){
            return 6;
        }
        if(group.equals("SUPPORT")){
            return 7;
        }
        if(group.equals("LARGER")){
            return 8;
        }
        return -1;
    }


    public Instances createTypeInstances(List<Modification> classifierEntries){

        FastVector atts;
        FastVector attsRel;
        FastVector attVals;
        FastVector attValsRel;
        Instances data;
        Instances dataRel;
        double[] vals;
        double[] valsRel;

        ArrayList classVal;

        // Set up attributes
        atts = new FastVector();

        classVal = new ArrayList();

        classVal.add("TEXTUAL");
        classVal.add("SUPPORTEDBYLANGUAGE");
        classVal.add("VISUALREPRESENTATION");
        classVal.add("ORGANIZATION");
        classVal.add("SOLUTIONAPPROACH");
        classVal.add("RESOURCE");
        classVal.add("SUPPORT");
        classVal.add("CHECK");
        classVal.add("INTERFACE");
        classVal.add("LOGIC");
        classVal.add("LARGER");


        atts.addElement(new Attribute("oldLoc"));
        atts.addElement(new Attribute("oldLocComments"));
        atts.addElement(new Attribute("oldLocExec"));
        atts.addElement(new Attribute("oldLocBlank"));
        atts.addElement(new Attribute("oldFirstChar"));
        atts.addElement(new Attribute("oldLastChar"));
        atts.addElement(new Attribute("oldCyclomaticComplexity"));

        atts.addElement(new Attribute("newLoc"));
        atts.addElement(new Attribute("newLocComments"));
        atts.addElement(new Attribute("newLocExec"));
        atts.addElement(new Attribute("newLocBlank"));
        atts.addElement(new Attribute("newFirstChar"));
        atts.addElement(new Attribute("newLastChar"));
        atts.addElement(new Attribute("newCyclomaticComplexity"));

        atts.addElement(new Attribute("numberOfAddedWords"));
        atts.addElement(new Attribute("numberOfDeletedWords"));
        atts.addElement(new Attribute("numberOfAddedCharacters"));
        atts.addElement(new Attribute("numberOfDeletedCharacters"));
        atts.addElement(new Attribute("levenshteinDistance"));
        atts.addElement(new Attribute("numKeywords"));
        atts.addElement(new Attribute("keyword"));

        atts.addElement(new Attribute("oneIf"));
        atts.addElement(new Attribute("brackets"));

        atts.addElement(new Attribute("commentWord"));
        atts.addElement(new Attribute("methodWord"));
        atts.addElement(new Attribute("returnWord"));
        atts.addElement(new Attribute("changWord"));
        atts.addElement(new Attribute("finalWord"));
        atts.addElement(new Attribute("testWord"));
        atts.addElement(new Attribute("addWord"));

        atts.addElement(new Attribute("type", classVal));

        attsRel = new FastVector();
        attVals = new FastVector();

        // Create Instances object
        data = new Instances("MyRelation", atts, 0);

        // Fill with data

        int count = 0;
        for(Modification ce : classifierEntries) {

            if (ce.getFeatures().type != null && !ce.getFeatures().type.equals("UNKNOWN")) {

                vals = new double[data.numAttributes()];
                // - numeric

                FeaturesList featuresList = ce.getFeatures();

                vals[0] = featuresList.oldLOC;
                vals[1] = featuresList.oldLOCComments;
                vals[2] = featuresList.oldLOCExec;
                vals[3] = featuresList.oldLOCBlank;
                vals[4] = featuresList.oldFirstChar;
                vals[5] = featuresList.oldLastChar;
                vals[6] = featuresList.oldCyclomaticComplexity;

                vals[7] = featuresList.newLOC;
                vals[8] = featuresList.newLOCComments;
                vals[9] = featuresList.newLOCExec;
                vals[10] = featuresList.newLOCBlank;
                vals[11] = featuresList.newFirstChar;
                vals[12] = featuresList.newLastChar;
                vals[13] = featuresList.newCyclomaticComplexity;

                vals[14] = featuresList.numberOfAddedWords;
                vals[15] = featuresList.numberOfDeletedWords;
                vals[16] = featuresList.numberOfAddedCharacters;
                vals[17] = featuresList.numberOfDeletedCharacters;
                vals[18] = featuresList.levenshteinDistance;
                vals[19] = featuresList.numKeywords;
                vals[20] = convertKeyWordToInt(featuresList.keyword);

                vals[21] = featuresList.ifAdded;
                vals[22] = featuresList.brackets;

                for(int i=1; i<=ce.getFeatures().commentKeywords.size(); i++){
                    vals[22+i] = ce.getFeatures().commentKeywords.get(i-1);
                }

                //vals[22] = data.attribute(22).addStringValue(convertToString(featuresList.getCommentKeywords()));

                int sizeWords = ce.getFeatures().commentKeywords.size();
                vals[23+sizeWords] = convertToInt(featuresList.type);



                //vals[2] = attVals.indexOf("val5.3");
                data.add(new DenseInstance(1.0, vals));
            }
        }

        return data;
    }


    private int convertKeyWordToInt(String keyword){
        if(keyword.equals("private")){
            return 1;
        }
        if(keyword.equals("public")){
            return 2;
        }
        if(keyword.equals("protected")){
            return 3;
        }
        if(keyword.equals("static")){
            return 4;
        }
        if(keyword.equals("final")){
            return 5;
        }
        if(keyword.equals("this")){
            return 6;
        }
        if(keyword.equals("void")){
            return 7;
        }
        if(keyword.equals("volatile")){
            return 8;
        }
        return 0;
    }


    private int convertToInt(String type){
        if(type.equals("TEXTUAL")){
            return 0;
        }
        if(type.equals("SUPPORTEDBYLANGUAGE")) {
            return 1;
        }
        if(type.equals("VISUALREPRESENTATION")){
            return 2;
        }
        if(type.equals("ORGANIZATION")){
            return 3;
        }
        if(type.equals("SOLUTIONAPPROACH")){
            return 4;
        }
        if(type.equals("RESOURCE")){
            return 5;
        }
        if(type.equals("SUPPORT")){
            return 6;
        }
        if(type.equals("CHECK")){
            return 7;
        }
        if(type.equals("INTERFACE")){
            return 8;
        }
        if(type.equals("LOGIC")){
            return 9;
        }
        if(type.equals("LARGER")){
            return 10;
        }
        return -1;
    }

    public Instances createSubtypeInstances(List<Modification> classifierEntries){

        FastVector atts;
        FastVector attsRel;
        FastVector attVals;
        FastVector attValsRel;
        Instances data;
        Instances dataRel;
        double[] vals;
        double[] valsRel;

        ArrayList classVal;

        // Set up attributes
        atts = new FastVector();

        classVal = new ArrayList();

        classVal.add("NAMING");
        classVal.add("COMMENTS");
        classVal.add("DEBUGINFO");
        classVal.add("ELEMENTTYPE");
        classVal.add("IMMUTABLE");
        classVal.add("VISIBILITY");
        classVal.add("VOIDPARAMETER");
        classVal.add("ELEMENTREFERENCE");
        classVal.add("BRACKETUSAGE");
        classVal.add("INDENTATION");
        classVal.add("BLANKLINE");
        classVal.add("LONGLINE");
        classVal.add("SPACEUSAGE");
        classVal.add("GROUPING");
        classVal.add("MOVEFUNCTIONALITY");
        classVal.add("LONGSUBROUTINE");
        classVal.add("DEADCODE");
        classVal.add("DUPLICATION");
        classVal.add("COMPLEXCODE");
        classVal.add("STATEMENTISSUES");
        classVal.add("CONSISTENCY");
        classVal.add("SEMANTICDUPLICATION");
        classVal.add("SEMANTICDEADCODE");
        classVal.add("CHANGEFUNCTION");
        classVal.add("USESTANDARDMETHOD");
        classVal.add("NEWFUNCTIONALITY");
        classVal.add("MINOR");
        classVal.add("VARIABLEINITIALIZATION");
        classVal.add("MEMORYMANAGEMENT");
        classVal.add("DATA_RESOURCEMANIPULATION");
        classVal.add("CHECKFUNCTION");
        classVal.add("CHECKVARIABLE");
        classVal.add("CHECKUSERINPUT");
        classVal.add("FUNCTIONCALL");
        classVal.add("PARAMETER");
        classVal.add("COMPARE");
        classVal.add("COMPUTE");
        classVal.add("WRONGLOCATION");
        classVal.add("ALGORITHM_PERFORMANCE");
        classVal.add("COMPLETENESS");
        classVal.add("GUI");
        classVal.add("CHECKOUTCODE");
        classVal.add("OTHER");



        atts.addElement(new Attribute("oldLoc"));
        atts.addElement(new Attribute("oldLocComments"));
        atts.addElement(new Attribute("oldLocExec"));
        atts.addElement(new Attribute("oldLocBlank"));
        atts.addElement(new Attribute("oldFirstChar"));
        atts.addElement(new Attribute("oldLastChar"));
        atts.addElement(new Attribute("oldCyclomaticComplexity"));

        atts.addElement(new Attribute("newLoc"));
        atts.addElement(new Attribute("newLocComments"));
        atts.addElement(new Attribute("newLocExec"));
        atts.addElement(new Attribute("newLocBlank"));
        atts.addElement(new Attribute("newFirstChar"));
        atts.addElement(new Attribute("newLastChar"));
        atts.addElement(new Attribute("newCyclomaticComplexity"));

        atts.addElement(new Attribute("numberOfAddedWords"));
        atts.addElement(new Attribute("numberOfDeletedWords"));
        atts.addElement(new Attribute("numberOfAddedCharacters"));
        atts.addElement(new Attribute("numberOfDeletedCharacters"));
        atts.addElement(new Attribute("levenshteinDistance"));
        atts.addElement(new Attribute("numKeywords"));
        atts.addElement(new Attribute("keyword"));

        atts.addElement(new Attribute("oneIf"));

        atts.addElement(new Attribute("subtype", classVal));

        attsRel = new FastVector();
        attVals = new FastVector();

        // Create Instances object
        data = new Instances("MyRelation", atts, 0);

        // Fill with data
        for(Modification ce : classifierEntries) {

            if (ce.getFeatures().subtype != null && !ce.getFeatures().subtype.equals("UNKNOWN")) {

                vals = new double[data.numAttributes()];
                // - numeric

                FeaturesList featuresList = ce.getFeatures();


                vals[0] = featuresList.oldLOC;
                vals[1] = featuresList.oldLOCComments;
                vals[2] = featuresList.oldLOCExec;
                vals[3] = featuresList.oldLOCBlank;
                vals[4] = featuresList.oldFirstChar;
                vals[5] = featuresList.oldLastChar;
                vals[6] = featuresList.oldCyclomaticComplexity;

                vals[7] = featuresList.newLOC;
                vals[8] = featuresList.newLOCComments;
                vals[9] = featuresList.newLOCExec;
                vals[10] = featuresList.newLOCBlank;
                vals[11] = featuresList.newFirstChar;
                vals[12] = featuresList.newLastChar;
                vals[13] = featuresList.newCyclomaticComplexity;

                vals[14] = featuresList.numberOfAddedWords;
                vals[15] = featuresList.numberOfDeletedWords;
                vals[16] = featuresList.numberOfAddedCharacters;
                vals[17] = featuresList.numberOfDeletedCharacters;
                vals[18] = featuresList.levenshteinDistance;
                vals[19] = featuresList.numKeywords;
                vals[20] = convertKeyWordToInt(featuresList.keyword);

                vals[21] = featuresList.ifAdded;

                vals[22] = convertSubtypeToInt(featuresList.subtype);


                //vals[2] = attVals.indexOf("val5.3");
                data.add(new DenseInstance(1.0, vals));
            }
        }

        System.out.println(data.size());

        return data;
    }


    private int convertSubtypeToInt(String subtype){
        if(subtype.equals("NAMING")){
            return 0;
        }
        if(subtype.equals("COMMENTS")){
            return 1;
        }
        if(subtype.equals("DEBUGINFO")){
            return 2;
        }
        if(subtype.equals("ELEMENTTYPE")){
            return 3;
        }
        if(subtype.equals("IMMUTABLE")){
            return 4;
        }
        if(subtype.equals("VISIBILITY")){
            return 5;
        }
        if(subtype.equals("VOIDPARAMETER")){
            return 6;
        }
        if(subtype.equals("ELEMENTREFERENCE")){
            return 7;
        }
        if(subtype.equals("BRACKETUSAGE")){
            return 8;
       }
       if(subtype.equals("INDENTATION")){
           return 9;
       }
        if(subtype.equals("BLANKLINE")){
            return 10;
        }
        if(subtype.equals("LONGLINE")){
            return 11;
        }
        if(subtype.equals("SPACEUSAGE")){
            return 12;
        }
        if(subtype.equals("GROUPING")){
            return 13;
        }
        if(subtype.equals("MOVEFUNCTIONALITY")){
            return 14;
        }
        if(subtype.equals("LONGSUBROUTINE")){
            return 15;
        }
        if(subtype.equals("DEADCODE")){
            return 16;
        }
        if(subtype.equals("DUPLICATION")){
            return 17;
        }
        if(subtype.equals("COMPLEXCODE")){
            return 18;
        }
        if(subtype.equals("STATEMENTISSUES")){
            return 19;
        }
        if(subtype.equals("CONSISTENCY")){
            return 20;
        }
        if(subtype.equals("SEMANTICDUPLICATION")){
            return 21;
        }
        if(subtype.equals("SEMANTICDEADCODE")){
            return 22;
        }
        if(subtype.equals("CHANGEFUNCTION")){
            return 23;
        }
        if(subtype.equals("USESTANDARDMETHOD")){
            return 24;
        }
        if(subtype.equals("NEWFUNCTIONALITY")){
            return 25;
        }
        if(subtype.equals("MINOR")){
            return 26;
        }
        if(subtype.equals("VARIABLEINITIALIZATION")){
            return 27;
        }
        if(subtype.equals("MEMORYMANAGEMENT")){
            return 28;
        }
        if(subtype.equals("DATA_RESOURCEMANIPULATION")){
            return 29;
        }
        if(subtype.equals("CHECKFUNCTION")){
            return 30;
        }
        if(subtype.equals("CHECKVARIABLE")){
            return 31;
        }
        if(subtype.equals("CHECKUSERINPUT")){
            return 32;
        }
        if(subtype.equals("FUNCTIONCALL")){
            return 33;
        }
        if(subtype.equals("PARAMETER")){
            return 34;
        }
        if(subtype.equals("COMPARE")){
            return 35;
        }
        if(subtype.equals("COMPUTE")){
            return 36;
        }
        if(subtype.equals("WRONGLOCATION")){
            return 37;
        }
        if(subtype.equals("ALGORITHM_PERFORMANCE")){
            return 38;
        }
        if(subtype.equals("COMPLETENESS")){
            return 39;
        }
        if(subtype.equals("GUI")){
            return 40;
        }
        if(subtype.equals("CHECKOUTCODE")){
            return 41;
        }
        if(subtype.equals("OTHER")){
            return 42;
        }
        return -1;
    }


    private String convertToString(List<String> keyWords){
        String keyWordsString = "";
        if(keyWords == null) {
            return keyWordsString;
        }
        for(String keyWord : keyWords){
            keyWordsString = keyWordsString.concat(keyWord);
            keyWordsString = keyWordsString.concat(";");
        }
        return keyWordsString;
    }


    public Instances createGroupInstancesLessFeatures(List<Modification> classifierEntries){
        FastVector atts;
        FastVector attsRel;
        FastVector attVals;
        FastVector attValsRel;
        Instances data;
        Instances dataRel;
        double[] vals;
        double[] valsRel;

        ArrayList classVal;

        // Set up attributes
        atts = new FastVector();

        classVal = new ArrayList();

        classVal.add("DOCUMENTATION");
        classVal.add("VISUALREPRESENTATION");
        classVal.add("STRUCTURE");
        classVal.add("FUNCTIONAL");


        atts.addElement(new Attribute("oldLoc"));
        atts.addElement(new Attribute("oldLocComments"));
        atts.addElement(new Attribute("oldLocExec"));
        atts.addElement(new Attribute("oldLocBlank"));
        atts.addElement(new Attribute("oldFirstChar"));
        atts.addElement(new Attribute("oldLastChar"));
        atts.addElement(new Attribute("oldCyclomaticComplexity"));

        atts.addElement(new Attribute("newLoc"));
        atts.addElement(new Attribute("newLocComments"));
        atts.addElement(new Attribute("newLocExec"));
        atts.addElement(new Attribute("newLocBlank"));
        atts.addElement(new Attribute("newFirstChar"));
        atts.addElement(new Attribute("newLastChar"));
        atts.addElement(new Attribute("newCyclomaticComplexity"));

        atts.addElement(new Attribute("numberOfAddedWords"));
        atts.addElement(new Attribute("numberOfDeletedWords"));
        atts.addElement(new Attribute("numberOfAddedCharacters"));
        atts.addElement(new Attribute("numberOfDeletedCharacters"));
        atts.addElement(new Attribute("levenshteinDistance"));
        atts.addElement(new Attribute("numKeywords"));
        //atts.addElement(new Attribute("keyword"));

        atts.addElement(new Attribute("oneIf"));
        //atts.addElement(new Attribute("brackets"));
        //atts.addElement(new Attribute("commentsWords"));

        atts.addElement(new Attribute("cyclomaticDiff"));
        atts.addElement(new Attribute("LOCExecDiff"));
        atts.addElement(new Attribute("LOCDiff"));
        atts.addElement(new Attribute("LOCCommentsDiff"));

        atts.addElement(new Attribute("oneIFDel"));
        atts.addElement(new Attribute("Functions added"));
        atts.addElement(new Attribute("Cycles diff"));
        atts.addElement(new Attribute("assignment diff"));

        atts.addElement(new Attribute("if diff"));
        atts.addElement(new Attribute("min and p diff"));

        atts.addElement(new Attribute("New"));
        atts.addElement(new Attribute("New added"));

        atts.addElement(new Attribute("Comma diff"));

        atts.addElement(new Attribute("category", classVal));

        attsRel = new FastVector();
        attVals = new FastVector();

        // Create Instances object
        data = new Instances("MyRelation", atts, 0);

        // Fill with data
        for(Modification ce : classifierEntries) {

            if (ce.getFeatures().group != null && !ce.getFeatures().group.equals("UNKNOWN")) {

                vals = new double[data.numAttributes()];
                // - numeric

                FeaturesList featuresList = ce.getFeatures();

                vals[0] = featuresList.oldLOC;
                vals[1] = featuresList.oldLOCComments;
                vals[2] = featuresList.oldLOCExec;
                vals[3] = featuresList.oldLOCBlank;
                vals[4] = featuresList.oldFirstChar;
                vals[5] = featuresList.oldLastChar;
                vals[6] = featuresList.oldCyclomaticComplexity;

                vals[7] = featuresList.newLOC;
                vals[8] = featuresList.newLOCComments;
                vals[9] = featuresList.newLOCExec;
                vals[10] = featuresList.newLOCBlank;
                vals[11] = featuresList.newFirstChar;
                vals[12] = featuresList.newLastChar;
                vals[13] = featuresList.newCyclomaticComplexity;

                vals[14] = featuresList.numberOfAddedWords;
                vals[15] = featuresList.numberOfDeletedWords;
                vals[16] = featuresList.numberOfAddedCharacters;
                vals[17] = featuresList.numberOfDeletedCharacters;
                vals[18] = featuresList.levenshteinDistance;
                vals[19] = featuresList.numKeywords;
                //vals[20] = convertKeyWordToInt(featuresList.keyword);

                vals[20] = featuresList.ifAdded;
                //vals[22] = featuresList.brackets;
                //vals[23] = featuresList.wordsInComment;


                vals[21] = featuresList.cyclomaticDiff;
                vals[22] = featuresList.LOCExecDiff;
                vals[23] = featuresList.LOCDiff;
                vals[24] = featuresList.LOCCommentsDiff;

                vals[25] = featuresList.countFunctionsDiff;
                //vals[29] = featuresList.countFunctionsChanged;
                vals[26] = featuresList.countFunctionsAdded;

                vals[27] = featuresList.cyclesDiff;
                vals[28] = featuresList.assignmentDiff;

                vals[29] = featuresList.countIfDiff;
                vals[30] = featuresList.countMinusAndPlus;

                vals[31] = featuresList.comparisonDiff;
                vals[32] = featuresList.countNewAdded;


                vals[33] = featuresList.commasDiff;

                vals[34] = convertGroupToInt(featuresList.group);

                data.add(new DenseInstance(1.0, vals));
            }
        }

        System.out.println(data.size());

        return data;
    }

}
