#!/usr/bin/env python3

import sys, argparse, os, re
from collect_data import collect
from collections import defaultdict

def find_errors(verbose):
    data = collect()
    for job in data:
        print("+++++++++++++++++++++++++++")
        print(f"+ Job {job}")
        stopped_early(data[job],verbose)
        lambdas_in_output(data[job],verbose)
        unexpected_result(data[job],verbose)
        sat_and_unsat(data[job],verbose)
        sat_diff(data[job],verbose)
        if job == "TFF":
            intensional_diff(data[job],verbose)

def stopped_early(data,verbose):
    errors = []
    for config in data:
        for b in data[config]:
            d = data[config][b]
            if d["result"] == '--' and d["cpu time"] < 5:
                errors.append(f"early error for {config} in {d['benchmark']} after {d['cpu time']} seconds, see {d['job_output_file']}")
    print(f"Stopped early: {len(errors)}")
    if verbose:
        for error in errors:
            print(error)

def lambdas_in_output(data,verbose):
    errors = []
    for config in data:
        for b in data[config]:
            d = data[config][b]
            file = open(d["job_output_file"], "r")
            for line in file:
                if 'λ' in line:
                    errors.append(f"{d['job_output_file']} contains λ")
    print(f"Contains lambdas: {len(errors)}")
    if verbose:
        for error in errors:
            print(error)

def unexpected_result(data,verbose):
    unknown_theorem = defaultdict(list)
    unknown_sat = defaultdict(list)
    open_theorem = defaultdict(list)
    open_sat = defaultdict(list)
    false_theorem = defaultdict(list)
    false_sat = defaultdict(list)
    for config in data:
        for b in data[config]:
            d = data[config][b]
            res = d["result"]
            if res == 'Theorem' or res == 'Unsatisfiable':
                if d["expected"] not in ["Theorem","Unsatisfiable","ContradictoryAxioms","Unknown"]:
                    false_theorem[config].append(d)
                if d["expected"] == "Unknown":
                    unknown_theorem[config].append(d)
                if d["expected"] == "Open":
                    open_theorem[config].append(d)
            elif res == 'Satisfiable' or res == 'CounterSatisfiable' or res == 'GaveUp':
                if d["expected"] not in ["CounterSatisfiable","Satisfiable","Unknown"]:
                    false_sat[config].append(d)
                if d["expected"] == "Unknown":
                    unknown_sat[config].append(d)
                if d["expected"] == "Open":
                    open_sat[config].append(d)

    for config in data:
        print(f"{config:{30}}: " \
            f"unknown_theorem {len(unknown_theorem[config]):6}" \
            f" | unknown_sat {len(unknown_sat[config]):6}" \
            f" | open_theorem {len(open_theorem[config]):6}" \
            f" | open_sat {len(open_sat[config]):6}" \
            f" | false_theorem {len(false_theorem[config]):6}" \
            f" | false_sat {len(false_sat[config]):6}")

def sat_and_unsat(data,verbose):
    # One prover cannot say unsat if another says sat
    for config1 in data:
        d1 = data[config1]
        for config2 in data:
            d2 = data[config2]
            if config1 in ["original_fo.sh","original_fo_kbo.sh"] or config2 in ["original_fo.sh","original_fo_kbo.sh"]:
                continue
            if "ext" in config1 and "ext" not in config2:
                continue
            if "ext" not in config1 and "ext" in config2:
                continue
            errors = []
            for b in d1:
                if (d1[b]["result"] in ['Theorem','Unsatisfiable']) and \
                   (d2[b]["result"] in ["Satisfiable", "CounterSatisfiable","GaveUp"]):
                    errors.append(f"{b} in {d1[b]['cpu time']} vs {d2[b]['cpu time']}")
            if len(errors) > 0:
                print(f"Sat {config2} and unsat {config1} : {len(errors)}")
            if verbose:
                for error in errors:
                    print(error)

def intensional_diff(data,verbose):
    # Intensional calculi should coincide on TFF
    config1="original_fo_kbo.sh"
    config2="original_purification_int_kbo.sh"
    d1 = data[config1]
    d2 = data[config2]

    errors = []
    for b in d1:
        if d1[b]["result"] in ['Theorem','Unsatisfiable'] and d2[b]["result"] in ["ResourceOut", "--"]:
            errors.append(f"{b} in {d1[b]['cpu time']} vs {d2[b]['cpu time']}")
    if len(errors) > 0:
        print(f"-- {config2} and Unsat {config1} : {len(errors)}")
    if verbose:
        for error in errors:
            print(error)

def sat_diff(data,verbose):
    # Why does app encode saturate more often?
    config1="app_fo.sh"
    config2="original_supatvars_int.sh"
    d1 = data[config1]
    d2 = data[config2]

    errors = []
    for b in d1:
        if d1[b]["result"] in ["Satisfiable", "CounterSatisfiable","GaveUp"] and d2[b]["result"] in ["ResourceOut", "--"]:
            errors.append(f"{b} in {d1[b]['cpu time']} vs {d2[b]['cpu time']}")
    if len(errors) > 0:
        print(f"GaveUp {config1} and -- {config2} : {len(errors)}")
    if verbose:
        for error in errors:
            print(error)

if __name__ == "__main__":
    p = argparse.ArgumentParser('find errors')
    p.add_argument('--verbose', type=bool, nargs="?",
                    help='verbose mode', default=False, const=True)
    args = p.parse_args()
    find_errors(args.verbose)
