#!/usr/bin/env python3

from IPython.display import display
from multiprocessing import Pool
from IPython import embed
from capstone import *
import pandas as pd
import angr
import sys
import os

def analyze_dump(task):
    csv_path = task['csv_path']
    dump_path = task['dump_path']
    out_path = task['out_path']

    if os.path.exists(out_path):
        print("Already analyzed:", csv_path)
        return
    else:
        print("Analyzing:", csv_path, dump_path)

    df = pd.read_csv(csv_path)

    #addrs = [int(a, 16) - 0xffffffff81000000 for a in df['address']]
    #print(addrs)

    proj = angr.Project(dump_path, main_opts={'backend': 'blob', 'arch': 'amd64', 'base_addr': 0xffffffff81000000}, auto_load_libs=False)
    md = Cs(CS_ARCH_X86, CS_MODE_64)

    res_validity = []
    for idx in range(len(df)):
        if df.loc[idx, 'type'] == 'swapgs':
            addr = int(df.loc[idx, 'address'], 16)
            insns = md.disasm(proj.loader.memory.load(addr, 0x100), addr)

            matched = False
            for insn in list(insns)[1:]:
                #print(f"0x{insn.address:x}:  {insn.mnemonic} {insn.op_str}")
                if insn.mnemonic in ('ret', 'iretq', 'iret', 'sysret', 'call', 'swapgs', 'jne', 'je'):
                    break

                if insn.mnemonic == 'mov' and 'rsp, qword ptr gs:' in insn.op_str:
                    print("MATCHED")
                    matched = True
                    break
            if matched:
                res_validity += [True]
                continue

        if df.loc[idx, 'validity'] == False:
            res_validity += [False]
            continue

        instr_type = df.loc[idx, 'type']

        if instr_type == 'iretq':
            res_validity += [True]
            continue

        if instr_type == 'clac':
            res_validity += [False]
            continue

        instrs = df.loc[idx, 'asm'].split(';')

        # sanity check that the value we just set wont be overwritten
        overwritten = False
        for instr in instrs[1:]:
            if instr_type in instr:
                overwritten = True

        # check that stac gadget doesn't get undone by clac
        if instr_type == 'stac':
            for instr in instrs[1:]:
                if 'clac' in instr:
                    overwritten = True

        # sanity check that the gadget ends in ret, jmp, or call
        valid_end = False
        if 'ret' in instrs[-1]:
            valid_end = True
        if 'jmp' in instrs[-1]:
            valid_end = True
        if 'call' in instrs[-1]:
            valid_end = True


        if (not overwritten) and valid_end:
            res_validity += [True]
        else:
            res_validity += [False]

        print(res_validity[-1], valid_end, overwritten, instrs)

    df['validity'] = res_validity
    display(df)

    df.to_csv(out_path)

def main(argc, argv):
    csvs_path = argv[1]
    dumps_path = argv[2]
    out_path = argv[3]

    tasks = []
    for dump_file in os.listdir(dumps_path):
        print(dump_file)
        dump_path = os.path.join(dumps_path, dump_file)
        csv_path = csvs_path + '/' + dump_file + '_new.csv'
        new_csv_path = out_path + '/' + dump_file + '_new.csv'
        tasks += [{'csv_path': csv_path, 'dump_path': dump_path, 'out_path': new_csv_path}]

    print(tasks)
    #analyze_dump(tasks[0])
    with Pool(3) as p:
        p.map(analyze_dump, tasks)

if __name__ == "__main__":
    main(len(sys.argv), sys.argv)
