In [1]:
import json
with open("final_unverified.json", "r") as f:
    final_dataset = json.load(f)

In [2]:
import shutil
import os
import shlex
import time
from subprocess import Popen, PIPE
from threading import Timer

def run_code(code):
    # make tests directory if it doesnt exist, if it does delete it
    if os.path.exists("tests"):
        shutil.rmtree("tests")
    os.mkdir("tests")
    with open(f"tests/test_temp.py", "w") as f:
        f.write(code)
    start_time = time.time()
    proc = Popen(shlex.split(f"pytest ./tests"), stdout=PIPE, stderr=PIPE)
    timer = Timer(10, proc.kill)
    try:
        timer.start()
        stdout, stderr = proc.communicate()
        return True, stdout, stderr, time.time() - start_time, proc.returncode
    except Exception as e:
        return False, "", e, time.time() - start_time, -1
    finally:
        timer.cancel()

In [3]:
successful_data = {}
for level, problems in final_dataset.items():
    print(level)
    if level not in successful_data:
        successful_data[level] = {}
    for key, problem_data in problems.items():
        ai_sol_pass = []
        for ai_sol in problem_data["ai_solutions"]:
            gen_code = "\n" + ai_sol
            test_str=[]
            for test in problem_data["sample_test_cases"]:
                input_str = test["input"]
                output_str = test["output"].replace("\r", "")
                stripped_output = repr(output_str).replace('\r', '').strip()
                test_str.append(f"assert run_code(<generated_code>,{repr(input_str)})=={stripped_output}")
            tests = [f"\n\ndef tdg_test_auto_runner_{i}():\n\t"+test.strip() for i,test in enumerate(test_str)]
            # if str(i) not in ai.test_memory or ai.test_memory[str(i)] <= ARBITRARY_LIMIT

            helper = """import subprocess

def run_code(code, input_string, timeout_duration=5):
    # Prepare the subprocess command to run the Python interpreter
    result = subprocess.run(
        ["python", "-c", code],
        input=input_string,
        text=True,
        capture_output=True,
        timeout=timeout_duration
    )
    if result.stderr:
        print(f"Error: {result.stderr}")
        return ""
    else:
        return result.stdout.replace("\\r","").strip()"""
            valid_code = helper + "\n\n".join(tests).replace("<generated_code>", repr(gen_code))
            not_timed_out, stdout, stderr, time_taken, return_code = run_code(valid_code)
            if not_timed_out and return_code == 0:
                ai_sol_pass.append(ai_sol)
        problem_data["ai_solutions"] = ai_sol_pass
        successful_data[level][key] = problem_data

0
1
1*
2
2*
3
3*
4
5
6
7


In [4]:
with open("final_successful_dataset.json", "w") as f:
    json.dump(successful_data, f, indent=4)