import sqlite3

import pandas as pd
from tqdm.auto import tqdm
import collections
from NGramGeneration import NGramGeneration

db_connection = sqlite3.connect("C:\\Users\\sheha\\Downloads\\AssertMessageData.sqlite")
input_data_fail = pd.read_sql_query(
    "select tokens from readability_score as A1 inner join assert_method as A2 on A1.AssertMethodId=A2.Id where AssertMethodName ='fail';",
    db_connection)

input_data_equals = pd.read_sql_query(
    "select tokens from readability_score as A1 inner join assert_method as A2 on A1.AssertMethodId=A2.Id where AssertMethodName ='assertEquals';",
    db_connection)

input_data_true = pd.read_sql_query(
    "select tokens from readability_score as A1 inner join assert_method as A2 on A1.AssertMethodId=A2.Id where AssertMethodName ='assertTrue';",
    db_connection)

input_data_false = pd.read_sql_query(
    "select tokens from readability_score as A1 inner join assert_method as A2 on A1.AssertMethodId=A2.Id where AssertMethodName ='assertFalse';",
    db_connection)

input_data_notnull = pd.read_sql_query(
    "select tokens from readability_score as A1 inner join assert_method as A2 on A1.AssertMethodId=A2.Id where AssertMethodName ='assertNotNull';",
    db_connection)    
    
terms_fail = []
input_data_fail['input_tokens'] = None
for index, row in tqdm(input_data_fail.iterrows(), desc="Processing rows", total=len(input_data_fail)):
    tokens = row['tokens'].lower().split(",")
    input_data_fail.at[index, 'input_tokens'] = tokens
    terms_fail.extend(tokens)

terms_equals = []
input_data_equals['input_tokens'] = None
for index, row in tqdm(input_data_equals.iterrows(), desc="Processing rows", total=len(input_data_equals)):
    tokens = row['tokens'].lower().split(",")
    input_data_equals.at[index, 'input_tokens'] = tokens
    terms_equals.extend(tokens)

terms_true = []
input_data_true['input_tokens'] = None
for index, row in tqdm(input_data_true.iterrows(), desc="Processing rows", total=len(input_data_true)):
    tokens = row['tokens'].lower().split(",")
    input_data_true.at[index, 'input_tokens'] = tokens
    terms_true.extend(tokens)

terms_false = []
input_data_false['input_tokens'] = None
for index, row in tqdm(input_data_false.iterrows(), desc="Processing rows", total=len(input_data_false)):
    tokens = row['tokens'].lower().split(",")
    input_data_false.at[index, 'input_tokens'] = tokens
    terms_false.extend(tokens)

terms_notnull = []
input_data_notnull['input_tokens'] = None
for index, row in tqdm(input_data_notnull.iterrows(), desc="Processing rows", total=len(input_data_notnull)):
    tokens = row['tokens'].lower().split(",")
    input_data_notnull.at[index, 'input_tokens'] = tokens
    terms_notnull.extend(tokens)


ngram_generation = NGramGeneration()


counter_fail=collections.Counter(terms_fail)
print(counter_fail.most_common(15))
input_data_fail = ngram_generation.generate_bi_trigrams(input_data_fail)
ngram_generation.generate_word_frequency(input_data_fail['bigrams_text'], 'fail_bigram')
ngram_generation.generate_word_frequency(input_data_fail['tokens'], 'fail_unigram')
print("---")

counter_equals=collections.Counter(terms_equals)
print(counter_equals.most_common(25))
input_data_equals= ngram_generation.generate_bi_trigrams(input_data_equals)
ngram_generation.generate_word_frequency(input_data_equals['bigrams_text'], 'equals_bigram')
ngram_generation.generate_word_frequency(input_data_equals['tokens'], 'equals_unigram')
print("---")

counter_true=collections.Counter(terms_true)
print(counter_true.most_common(25))
input_data_true= ngram_generation.generate_bi_trigrams(input_data_true)
ngram_generation.generate_word_frequency(input_data_true['bigrams_text'], 'true_bigram')
ngram_generation.generate_word_frequency(input_data_true['tokens'], 'true_unigram')
print("---")

counter_false=collections.Counter(terms_false)
print(counter_false.most_common(25))
input_data_false= ngram_generation.generate_bi_trigrams(input_data_false)
ngram_generation.generate_word_frequency(input_data_false['bigrams_text'], 'false_bigram')
ngram_generation.generate_word_frequency(input_data_false['tokens'], 'false_unigram')
print("---")

counter_notnull=collections.Counter(terms_notnull)
print(counter_notnull.most_common(25))
input_data_notnull= ngram_generation.generate_bi_trigrams(input_data_notnull)
ngram_generation.generate_word_frequency(input_data_notnull['bigrams_text'], 'notnull_bigram')
ngram_generation.generate_word_frequency(input_data_notnull['tokens'], 'notnull_unigram')
print("---")