#!/usr/bin/env python3
import os
import json
import subprocess
import glob
from pathlib import Path
import pprint
from datetime import datetime
import sys
import argparse
from PIL import Image
import numpy as np

DATASET_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "dataset")
AVAILABLE_MODELS = ["gemma3:12b", "gemma3:4b"]
MODEL = AVAILABLE_MODELS[1]
OUTPUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "analysis_results")

os.makedirs(OUTPUT_DIR, exist_ok=True)

ALL_CATEGORIES = []

def compile_all_categories():
    """Compile a list of all unique categories from all metadata.json files in the dataset."""
    global ALL_CATEGORIES
    
    if ALL_CATEGORIES:
        return ALL_CATEGORIES
        
    print(f"Compiling global category list from all test cases in {DATASET_DIR}...")
    
    all_categories = set()
    test_dirs = [d for d in glob.glob(os.path.join(DATASET_DIR, "*")) if os.path.isdir(d)]
    
    for test_dir in test_dirs:
        metadata_path = os.path.join(test_dir, "metadata.json")
        if os.path.exists(metadata_path):
            try:
                with open(metadata_path, 'r') as f:
                    metadata = json.load(f)
                categories = metadata.get("categories", [])
                all_categories.update(categories)
            except Exception as e:
                print(f"Warning: Could not read metadata from {test_dir}: {e}")
    
    ALL_CATEGORIES = sorted(list(all_categories))
    print(f"Found {len(ALL_CATEGORIES)} unique categories across all test cases: {', '.join(ALL_CATEGORIES)}")
    return ALL_CATEGORIES

PROMPT_COMPONENTS = {
    "image_description": """
Compare these three related screenshot images:

1. REFERENCE: {reference} - This is the baseline/expected snapshot (correct UI state)
2. FAILURE: {failure} - This is the current snapshot we're testing (potentially incorrect UI state)
3. DIFF: {difference} - This highlights pixel differences between reference and failure
""",

    "category_instructions": """
POSSIBLE CATEGORIES: {categories}
{categories_context}

These categories represent potential reasons for the failure. You should select one or more of these categories that best describe the differences you observe.

If you believe there's a reason that doesn't fit any of the provided categories, you can create a new category with the format "UNKNOWN-<your reason>".
{important_note}
""",

    "ignore_instructions": """
IMPORTANT: In your analysis, IGNORE the following aspect of the differences: "{ignore_reason}"

Assume that this difference is expected/acceptable, and focus on any other differences that might exist.
Even if "{ignore_reason}" appears to be the only difference, look carefully for any other subtle changes.
""",

    "analysis_instructions": """
Analyze why this test failed by examining all three images carefully{ignore_clause}.

DEFINITIONS:
- Pixel-wise difference: Quantitative measure (0-1) of visual differences detected in the diff image
  - 0 means identical images
  - 1 means completely different images
- Semantic difference: Proportion (0-1) of UI components/features that differ in function or meaning
  - 0 means all components in reference image are present and correct in the failure image
  - 1 means all components are missing or wrong
""",

    "analyze_and_ignore_instructions": """
IMPORTANT: Follow these steps in order:
1. First, identify ALL categories of differences between the reference and failure images
2. ONLY use categories from the provided POSSIBLE CATEGORIES list
3. If you detect a difference that doesn't match any category in the list, use the "UNKNOWN-<reason>" format
4. Then, determine which category represents the MOST significant difference
5. In your final JSON response, EXCLUDE that most significant category from the "reasons" list
6. Include all other categories of differences in your "reasons" list

This approach helps us identify secondary issues that might be present beyond the main difference.
DO NOT create new category names that aren't in the POSSIBLE CATEGORIES list or in UNKNOWN-<reason> format.
""",

    "response_format": """
Provide your analysis as JSON with these fields:
{{
  "reasons": [
    "category1",
    "category2",
    "UNKNOWN-reason"
  ],
  "pixel_diff": 0.XX,
  "semantic_diff": 0.XX,
  "affected_elements": [
  ],
  "explanation": "Concise detailed explanation of the key differences{ignore_explanation}"
}}

Only respond with valid JSON. Be concise but thorough in your analysis.
"""
}

def build_prompt(mode, **kwargs):
    image_description = PROMPT_COMPONENTS["image_description"].format(
        reference=kwargs.get("reference", ""),
        failure=kwargs.get("failure", ""),
        difference=kwargs.get("difference", "")
    )
    components = [image_description]
    
    important_note = ""
    categories_context = ""
    
    if mode == "analyze_and_ignore":
        important_note = PROMPT_COMPONENTS["analyze_and_ignore_instructions"]
        categories_context = """
These categories represent potential types of UI differences. You should:
1. First identify ALL categories of differences you observe
2. Then determine which is the most significant difference
3. Use UNKNOWN-<reason> format for any new types of differences you discover
4. Finally, when reporting the `reasons` or other metrics in your JSON response, Do another pass to list all observed differences *except* for the one you identified as most significant. It may be the case that there are no defects left.
This helps us find secondary issues that might be masked by the primary difference."""
    elif mode == "ignore_from_analysis":
        categories_context = """
IMPORTANT: These categories were identified by a previous analysis of the same images.
The previous analysis found these specific differences between the reference and failure images.
You should:
1. Understand these previously identified categories
2. Focus on finding NEW differences after ignoring the specified category
3. Use UNKNOWN-<reason> format for any new types of differences you discover
This helps us validate the previous analysis and find issues that might have been masked."""
    elif mode == "ignore_from_metadata":
        categories_context = """
IMPORTANT: These categories are ground truth labels that describe known differences.
They represent verified UI differences between the reference and failure images.
You should:
1. Understand these verified categories
2. Focus on finding additional differences after ignoring the specified category
3. Use UNKNOWN-<reason> format for any new types of differences you discover
This helps us find additional issues beyond the known differences."""
    
    all_categories = compile_all_categories()
    test_specific_categories = kwargs.get("categories", "").split(", ") if kwargs.get("categories") else []
    
    if test_specific_categories:
        categories_context += f"\n\nNote: For this specific test, we previously observed these categories: {', '.join(test_specific_categories)}"
    
    category_instructions = PROMPT_COMPONENTS["category_instructions"].format(
        categories=", ".join(all_categories) if all_categories else "No predefined categories available. Use UNKNOWN-<reason> format to describe the changes.",
        important_note=important_note,
        categories_context=categories_context
    )
    components.append(category_instructions)
    
    ignore_clause = ""
    ignore_explanation = ""
    if mode in ["ignore_from_analysis", "ignore_from_metadata"]:
        if "ignore_reason" in kwargs:
            ignore_source = "previous analysis" if mode == "ignore_from_analysis" else "ground truth metadata"
            ignore_instructions = f"""
IMPORTANT: The following aspect was identified in {ignore_source} as a key difference: "{kwargs.get('ignore_reason', '')}"

You should:
1. Understand this identified difference
2. COMPLETELY IGNORE this aspect in your analysis
3. Look carefully for ANY other differences that might exist
4. Pay special attention to subtle changes that might have been overshadowed

Even if this seems to be the only difference, examine the images thoroughly for other changes."""
            components.append(ignore_instructions)
            ignore_clause = ", excluding the aspect mentioned above"
            ignore_explanation = ", excluding the ignored aspect"
    
    analysis_instructions = PROMPT_COMPONENTS["analysis_instructions"].format(
        ignore_clause=ignore_clause
    )
    components.append(analysis_instructions)
    
    response_format = PROMPT_COMPONENTS["response_format"].format(
        ignore_explanation=ignore_explanation
    )
    components.append(response_format)
    print(components)
    
    return "\n".join(components)

def prettify_json(json_data):
    formatted = json.dumps(json_data, indent=2)
    try:
        import colorama
        colorama.init()
        formatted = colorama.Fore.GREEN + formatted + colorama.Style.RESET_ALL
    except ImportError:
        pass
    return formatted

def clear_screen():
    os.system('cls' if os.name == 'nt' else 'clear')

def calculate_pixel_difference(reference_path, failure_path):
    try:
        ref_img = Image.open(reference_path).convert('RGB')
        fail_img = Image.open(failure_path).convert('RGB')
        
        ref_arr = np.array(ref_img)
        fail_arr = np.array(fail_img)
        
        if ref_arr.shape != fail_arr.shape:
            return 1.0
            
        pixel_diff = np.mean(np.abs(ref_arr - fail_arr) / 255.0)
        return float(pixel_diff)
    except Exception as e:
        print(f"  Warning: Could not calculate pixel difference: {e}")
        return None

def get_model_filename(mode, model):
    """Generate a filename for saving analysis results based on mode and model."""
    model_name = model.replace(":", "_")
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    return os.path.join(OUTPUT_DIR, f"{mode}_{model_name}_{timestamp}.json")

def find_latest_analysis_file(mode, model):
    """Find the latest analysis file for a specific mode and model."""
    model_name = model.replace(":", "_")
    pattern = os.path.join(OUTPUT_DIR, f"{mode}_{model_name}_*.json")
    files = glob.glob(pattern)
    if not files:
        return None
    
    files.sort(key=os.path.getmtime, reverse=True)
    return files[0]

def save_analysis_results(test_dir, analysis_data, mode):
    """Save analysis results to a JSON file with naming convention based on mode and model.
    Organizes analyses by test directory within the file."""
    model = analysis_data["_metadata"]["model"]
    model_name = model.replace(":", "_")
    
    filepath = get_model_filename(mode, model)
    
    analyses_by_test = {}
    
    analyses_by_test[test_dir] = analysis_data
    
    with open(filepath, 'w') as f:
        json.dump(analyses_by_test, f, indent=2)
    
    print(f"  📊 Saved {mode} analysis for {test_dir} to {filepath}")
    return filepath

def append_to_analysis_file(test_dir, analysis_data, mode, model):
    """Append analysis results to the existing latest file for this mode and model."""
    model_name = model.replace(":", "_")
    latest_file = find_latest_analysis_file(mode, model_name)
    
    if latest_file and os.path.exists(latest_file):
        try:
            with open(latest_file, 'r') as f:
                analyses_by_test = json.load(f)
            
            analyses_by_test[test_dir] = analysis_data
            
            with open(latest_file, 'w') as f:
                json.dump(analyses_by_test, f, indent=2)
            
            print(f"  📊 Updated {mode} analysis for {test_dir} in {latest_file}")
            return latest_file
        except Exception as e:
            print(f"  ⚠️ Error updating existing file: {e}")
    
    return save_analysis_results(test_dir, analysis_data, mode)

def load_latest_analysis(mode, model=None):
    """Load the latest analysis file for a given mode and optional model."""
    if model:
        filepath = find_latest_analysis_file(mode, model)
        if filepath and os.path.exists(filepath):
            try:
                with open(filepath, 'r') as f:
                    return json.load(f)
            except Exception as e:
                print(f"Warning: Could not read analysis file {filepath}: {e}")
    else:
        model_files = {}
        for available_model in AVAILABLE_MODELS:
            filepath = find_latest_analysis_file(mode, available_model)
            if filepath and os.path.exists(filepath):
                try:
                    with open(filepath, 'r') as f:
                        model_files[available_model] = json.load(f)
                except Exception:
                    pass
        return model_files
    
    return {}

def process_test_with_mode(test_dir, mode="analyze", original_analysis=None):
    dir_name = os.path.basename(test_dir)
    print(f"\n{'='*80}\nProcessing {dir_name} with mode: {mode}...\n{'='*80}")
    
    reference_path = os.path.join(test_dir, "reference.png")
    failure_path = os.path.join(test_dir, "failure.png")
    diff_path = os.path.join(test_dir, "diff.png")
    metadata_path = os.path.join(test_dir, "metadata.json")
    
    if not all(os.path.exists(path) for path in [reference_path, failure_path, diff_path, metadata_path]):
        print(f"  Missing required files in {dir_name}, skipping.")
        print(f"  - Reference: {'✅' if os.path.exists(reference_path) else '❌'}")
        print(f"  - Failure: {'✅' if os.path.exists(failure_path) else '❌'}")
        print(f"  - Diff: {'✅' if os.path.exists(diff_path) else '❌'}")
        print(f"  - Metadata: {'✅' if os.path.exists(metadata_path) else '❌'}")
        return None
    
    try:
        with open(metadata_path, 'r') as f:
            metadata = json.load(f)
        categories = metadata.get("categories", [])
        categories_str = ", ".join(categories) if categories else "No predefined categories available. Use UNKNOWN-<reason> format to describe the changes."
    except Exception as e:
        print(f"  Error reading metadata.json: {e}")
        categories_str = "No predefined categories available. Use UNKNOWN-<reason> format to describe the changes."
        categories = []
    
    actual_pixel_diff = calculate_pixel_difference(reference_path, failure_path)
    print(f"  Calculated pixel difference: {actual_pixel_diff:.3f}" if actual_pixel_diff is not None else "  Could not calculate pixel difference")
    
    ignore_reason = None
    if mode == "ignore_from_analysis" and original_analysis:
        if 'reasons' in original_analysis:
            reasons = original_analysis['reasons']
            if not isinstance(reasons, list):
                reasons = [reasons] if reasons else []
            
            if reasons:
                ignore_reason = reasons[0]
                print(f"  🔍 Ignoring reason from analysis: '{ignore_reason}'")
            else:
                print(f"  ⚠️ Empty reasons list in original analysis for {dir_name}")
        else:
            print(f"  ❌ No 'reasons' key found in original analysis for {dir_name}")
            
        if not ignore_reason:
            print(f"  ❌ Failed to extract a reason to ignore from original analysis. Skipping.")
            return None
    elif mode == "ignore_from_metadata" and categories:
        ignore_reason = categories[0]
        print(f"  🔍 Ignoring first category from metadata: '{ignore_reason}'")
    
    has_single_category = len(categories) == 1
    if has_single_category and mode in ["ignore_from_analysis", "ignore_from_metadata"]:
        print(f"  ℹ️ Note: Only one category available ({categories[0]}) - will look for new differences")
    elif mode == "analyze_and_ignore":
        if has_single_category:
            print(f"  ℹ️ Note: Only one category available ({categories[0]}) - will look for additional differences")
    
    current_dir = os.getcwd()
    os.chdir(test_dir)
    
    try:
        prompt_params = {
            "reference": os.path.abspath("reference.png"),
            "failure": os.path.abspath("failure.png"), 
            "difference": os.path.abspath("diff.png"),
            "categories": categories_str
        }
        
        if ignore_reason:
            prompt_params["ignore_reason"] = ignore_reason
        
        prompt = build_prompt(mode, **prompt_params)
        
        all_possible_categories = compile_all_categories()
        
        print(f"  Running analysis with Ollama model {MODEL}...")
        print(f"  Test-specific categories: {categories_str}")
        print(f"  Using global category list with {len(all_possible_categories)} categories")
        print(f"  Analysis mode: {mode}")
        
        prompt = f"IMPORTANT: The image files are located at the specified absolute paths. Process these files directly.\n\n{prompt}"
        
        try:
            start_time = datetime.now()
            result = subprocess.run(
                ["ollama", "run", MODEL, prompt],
                capture_output=True,
                text=True,
                check=True
            )
            end_time = datetime.now()
            response_time = (end_time - start_time).total_seconds()
            
            try:
                output = result.stdout.strip()
                start = output.find('{')
                end = output.rfind('}') + 1
                if start >= 0 and end > start:
                    json_str = output[start:end]
                    parsed = json.loads(json_str)
                    
                    if not isinstance(parsed.get("reasons", []), list):
                        parsed["reasons"] = [parsed["reasons"]] if parsed.get("reasons") else []
                    
                    if "reasons" in parsed:
                        valid_reasons = []
                        for reason in parsed["reasons"]:
                            if reason in all_possible_categories or reason.startswith("UNKNOWN-"):
                                valid_reasons.append(reason)
                            else:
                                print(f"  ⚠️ Converting invalid category '{reason}' to UNKNOWN format")
                                valid_reasons.append(f"UNKNOWN-{reason}")
                        parsed["reasons"] = valid_reasons
                    
                    run_timestamp = datetime.now().isoformat()
                    model_version = extract_model_version(MODEL, result.stdout)
                    
                    parsed["_metadata"] = {
                        "test_directory": dir_name,
                        "timestamp": run_timestamp,
                        "model": MODEL,
                        "model_version": model_version,
                        "possible_categories": categories,
                        "all_possible_categories": all_possible_categories,
                        "analysis_mode": mode,
                        "run_id": f"{MODEL.replace(':', '_')}_{run_timestamp}",
                        "ground_truth": {
                            "pixel_difference": actual_pixel_diff
                        },
                        "performance": {
                            "response_time_seconds": response_time,
                            "confidence_score": 1.0 if not any("UNKNOWN" in r for r in parsed.get("reasons", [])) else 0.5,
                            "has_unknown_categories": any("UNKNOWN" in r for r in parsed.get("reasons", [])),
                            "pixel_diff_accuracy": 1.0 - abs(actual_pixel_diff - parsed.get("pixel_diff", 0)) if actual_pixel_diff is not None else None
                        }
                    }
                    
                    if mode in ["ignore_from_analysis", "ignore_from_metadata"]:
                        parsed["_metadata"]["ignored_reason"] = ignore_reason
                    
                    if mode == "ignore_from_analysis" and original_analysis:
                        parsed["_metadata"]["original_analysis_id"] = original_analysis.get("_metadata", {}).get("run_id", "unknown")
                    
                    if mode in ["analyze_and_ignore", "_analyze_with_fallback"]:
                        if mode == "analyze_and_ignore":
                            if parsed.get("reasons") and has_single_category:
                                parsed["_metadata"]["ignored_reason"] = categories[0]
                            else:
                                parsed["_metadata"]["ignored_reason"] = "First category detected by model (inferred)"
                        else:
                            parsed["_metadata"]["ignored_reason"] = "None (single category fallback)"
                            parsed["_metadata"]["analysis_mode"] = "analyze_and_ignore"
                    
                    if mode == "analyze":
                        filepath = append_to_analysis_file(dir_name, parsed, "standard", MODEL)
                    else:
                        filepath = append_to_analysis_file(dir_name, parsed, mode, MODEL)
                    
                    print(f"  ✅ Analysis complete!")
                    
                    print("\n  📋 Analysis Summary:")
                    print(f"  Pixel Difference: {parsed.get('pixel_diff', 'N/A')}")
                    print(f"  Semantic Difference: {parsed.get('semantic_diff', 'N/A')}")
                    
                    if mode in ["ignore_from_analysis", "ignore_from_metadata", "analyze_and_ignore", "_analyze_with_fallback"]:
                        if ignore_reason:
                            ignore_text = f"After ignoring '{ignore_reason}'"
                        elif mode == "_analyze_with_fallback":
                            ignore_text = "Single category fallback analysis"
                        else:
                            ignore_text = "After ignoring first detected category"
                        print(f"  Remaining Failure Reasons ({ignore_text}): {', '.join(parsed.get('reasons', ['None']))}")
                    else:
                        print(f"  Failure Reasons: {', '.join(parsed.get('reasons', ['Unknown']))}")
                    
                    print("\n  Full Analysis:")
                    print(prettify_json(parsed))
                    
                    return parsed
                else:
                    print(f"  ❌ Could not extract JSON from output for {dir_name}")
                    fallback_path = os.path.join(test_dir, f"raw_output_{mode}.txt")
                    with open(fallback_path, 'w') as f:
                        f.write(output)
                    print(f"  📄 Saved raw output to {fallback_path}")
                    return None
            except json.JSONDecodeError:
                print(f"  ❌ Invalid JSON response for {dir_name}")
                fallback_path = os.path.join(test_dir, f"raw_output_{mode}.txt")
                with open(fallback_path, 'w') as f:
                    f.write(result.stdout)
                print(f"  📄 Saved raw output to {fallback_path}")
                return None
        except subprocess.CalledProcessError as e:
            print(f"  ❌ Error running Ollama for {dir_name}: {e}")
            print(f"  Error output: {e.stderr}")
            return None
        except Exception as e:
            print(f"  ❌ Unexpected error during analysis for {dir_name}: {e}")
            return None
    finally:
        os.chdir(current_dir)

def process_test_directory(test_dir):
    return process_test_with_mode(test_dir, mode="analyze")

def process_ignore_reason_analysis(test_dir, original_analysis):
    return process_test_with_mode(test_dir, mode="ignore_from_analysis", original_analysis=original_analysis)

def run_standard_analysis(debug_mode=False):
    if not os.path.exists(DATASET_DIR):
        print(f"❌ Dataset directory not found: {DATASET_DIR}")
        return {}
    
    test_dirs = [d for d in glob.glob(os.path.join(DATASET_DIR, "*")) if os.path.isdir(d)]
    
    if not test_dirs:
        print("❌ No test directories found in the dataset folder.")
        return {}
    
    if debug_mode:
        test_dirs = test_dirs[:2]
        print(f"  🐞 DEBUG MODE: Processing only {len(test_dirs)} test directories")
    
    print(f"🔍 Found {len(test_dirs)} test directories.")
    print(f"📊 Results will be saved to individual files in: {OUTPUT_DIR}")
    
    all_results = {}
    for test_dir in test_dirs:
        dir_name = os.path.basename(test_dir)
        result = process_test_directory(test_dir)
        if result:
            all_results[dir_name] = result
    
    print("\n✅ Completed processing all test directories.")
    return all_results

def run_ignore_reason_analysis(original_results, mode="ignore_from_analysis", debug_mode=False):
    if not original_results and mode not in ["ignore_from_metadata", "analyze_and_ignore"]:
        print(f"❌ No original analysis results found. Please run standard analysis first.")
        return {}
    
    print(f"🔍 Running '{mode}' analysis with model {MODEL}...")
    
    ignore_reason_results = {}

    if debug_mode and mode in ["ignore_from_metadata", "analyze_and_ignore"] and isinstance(original_results, dict):
        original_results = dict(list(original_results.items())[:2])
        print(f"  🐞 DEBUG MODE: Processing only {len(original_results)} test directories")
    
    for dir_name, analyses in original_results.items():
        test_dir = os.path.join(DATASET_DIR, dir_name)
        if not os.path.exists(test_dir):
            print(f"❌ Test directory not found: {test_dir}")
            continue
        
        if mode in ["ignore_from_metadata", "analyze_and_ignore"]:
            result = process_test_with_mode(test_dir, mode=mode)
            if result:
                ignore_reason_results[dir_name] = result
            continue
        
        if isinstance(analyses, list):
            latest_analysis = None
            latest_timestamp = None
            
            for analysis in analyses:
                timestamp = analysis.get("_metadata", {}).get("timestamp")
                if timestamp and (latest_timestamp is None or timestamp > latest_timestamp):
                    latest_analysis = analysis
                    latest_timestamp = timestamp
            
            if not latest_analysis:
                print(f"  ❌ No valid analysis found for {dir_name}, skipping.")
                continue
                
            result = process_test_with_mode(test_dir, mode=mode, original_analysis=latest_analysis)
        else:
            result = process_test_with_mode(test_dir, mode=mode, original_analysis=analyses)
            
        if result:
            ignore_reason_results[dir_name] = result
    
    print(f"\n✅ Completed processing all '{mode}' analyses with model {MODEL}.")
    return ignore_reason_results

def select_model():
    clear_screen()
    print("\n" + "="*80)
    print(" "*30 + "MODEL SELECTION")
    print("="*80 + "\n")
    print("📋 Available Models:\n")
    
    for i, model in enumerate(AVAILABLE_MODELS):
        print(f"{i}. {model}")
    
    print("\nr. Return to main menu\n")
    print("="*80)
    
    while True:
        choice = input("\nEnter your choice: ")
        
        if choice.lower() == 'r':
            return None
        
        try:
            model_index = int(choice)
            if 0 <= model_index < len(AVAILABLE_MODELS):
                return AVAILABLE_MODELS[model_index]
            else:
                print(f"❌ Invalid choice. Please enter a number between 0 and {len(AVAILABLE_MODELS)-1}.")
        except ValueError:
            print("❌ Invalid choice. Please enter a valid number.")

def print_menu():
    clear_screen()
    print("\n" + "="*80)
    print(" "*30 + "SNAPSHOT ANALYSIS MENU")
    print("="*80 + "\n")
    print(f"Current model: {MODEL}")
    print("\n📋 Available Options:\n")
    print("0. Select Model")
    print("   - Choose which LLM to use for analysis\n")
    print("1. Run Standard Analysis")
    print("   - Analyze all snapshots in the dataset directory")
    print("   - Create/update analysis.json files with failure reasons\n")
    print("2. Run 'Ignore Reason' Analysis (From Analysis)")
    print("   - For each test, take the first reason from the standard analysis")
    print("   - Run a new analysis ignoring that reason\n")
    print("3. Run 'Ignore Reason' Analysis (From Metadata)")
    print("   - For each test, take the first category from metadata")
    print("   - Run a new analysis ignoring that category\n")
    print("4. Run 'Analyze and Ignore' Analysis")
    print("   - Run a single analysis that detects categories and ignores the first one")
    print("   - Process in one prompt rather than separate analyses\n")
    print("q. Quit\n")
    print("="*80)
    choice = input("\nEnter your choice (0-4, or q): ")
    return choice

def main():
    global MODEL
    
    parser = argparse.ArgumentParser(description='Snapshot Analysis Tool')
    parser.add_argument('--all', action='store_true', help='Run all analysis modes with both 4b and 12b models')
    parser.add_argument('--debug', action='store_true', help='Process only the first 2 test directories for quick debugging')
    args = parser.parse_args()
    
    original_model = MODEL
    
    if args.all:
        print("\n💡 Running all analysis modes with both models...")
        
        models_to_use = [m for m in AVAILABLE_MODELS if "4b" in m] + [m for m in AVAILABLE_MODELS if "4b" not in m]
        print(f"📊 Will run analyses using models in order: {', '.join(models_to_use)}")
        
        for model_to_use in models_to_use:
            MODEL = model_to_use
            print(f"\n{'='*80}\nRunning all analyses with model: {MODEL}\n{'='*80}")
            
            print("\n🔎 Running Standard Analysis (Option 1)...")
            original_results = run_standard_analysis(debug_mode=args.debug)
            
            print("\n🔎 Running 'Ignore Reason' Analysis (From Analysis) (Option 2)...")
            if original_results:
                run_ignore_reason_analysis(original_results, mode="ignore_from_analysis", debug_mode=args.debug)
            else:
                latest_results = load_latest_analysis("standard", MODEL)
                if latest_results:
                    print(f"  📊 Using latest 'standard' analysis results for model {MODEL}")
                    run_ignore_reason_analysis(latest_results, mode="ignore_from_analysis", debug_mode=args.debug)
                else:
                    print(f"❌ No standard analysis results available for model {MODEL}. Skipping option 2.")
            
            print("\n🔎 Running 'Ignore Reason' Analysis (From Metadata) (Option 3)...")
            test_dirs = {os.path.basename(d): None for d in glob.glob(os.path.join(DATASET_DIR, "*")) if os.path.isdir(d)}
            if args.debug:
                test_dirs = dict(list(test_dirs.items())[:2])
                print(f"  🐞 DEBUG MODE: Processing only {len(test_dirs)} test directories")
            run_ignore_reason_analysis(test_dirs, mode="ignore_from_metadata", debug_mode=args.debug)
            
            print("\n🔎 Running 'Analyze and Ignore' Analysis (Option 4)...")
            run_ignore_reason_analysis(test_dirs, mode="analyze_and_ignore", debug_mode=args.debug)
        
        MODEL = original_model
        
        print("\n✅ All analyses completed! Generating final report...")
        try:
            import generate_report
            generate_report.generate_report()
        except ImportError:
            print("\n⚠️ Could not import generate_report module. Please run 'python generate_report.py' manually.")
        
        print("\n🏁 All done!")
        return
    
    original_results = {}
    
    while True:
        choice = print_menu()
        
        if choice == '0':
            selected_model = select_model()
            if selected_model:
                MODEL = selected_model
                print(f"\nModel changed to: {MODEL}")
            input("\nPress Enter to continue...")
            
        elif choice == '1':
            print("\nRunning Standard Analysis...")
            original_results = run_standard_analysis()
            input("\nPress Enter to continue...")
            
        elif choice == '2':
            print("\nRunning 'Ignore Reason' Analysis (From Analysis)...")
            if not original_results:
                latest_file = find_latest_analysis_file("standard", MODEL)
                if latest_file:
                    try:
                        with open(latest_file, 'r') as f:
                            original_results = json.load(f)
                        print(f"\n📊 Loaded latest standard analysis from {latest_file}")
                    except Exception as e:
                        print(f"\n❌ Error reading analysis results: {e}")
                        input("\nPress Enter to continue...")
                        continue
                else:
                    print(f"\n❌ No standard analysis results available for model {MODEL}. Please run option 1 first.")
                    input("\nPress Enter to continue...")
                    continue
                    
            run_ignore_reason_analysis(original_results, mode="ignore_from_analysis")
            input("\nPress Enter to continue...")

        elif choice == '3':
            print("\nRunning 'Ignore Reason' Analysis (From Metadata)...")
            if not os.path.exists(DATASET_DIR):
                print(f"\n❌ Dataset directory not found: {DATASET_DIR}")
                input("\nPress Enter to continue...")
                continue
                
            test_dirs = {os.path.basename(d): None for d in glob.glob(os.path.join(DATASET_DIR, "*")) if os.path.isdir(d)}
            run_ignore_reason_analysis(test_dirs, mode="ignore_from_metadata")
            input("\nPress Enter to continue...")
            
        elif choice == '4':
            print("\nRunning 'Analyze and Ignore' Analysis...")
            if not os.path.exists(DATASET_DIR):
                print(f"\n❌ Dataset directory not found: {DATASET_DIR}")
                input("\nPress Enter to continue...")
                continue
                
            test_dirs = {os.path.basename(d): None for d in glob.glob(os.path.join(DATASET_DIR, "*")) if os.path.isdir(d)}
            run_ignore_reason_analysis(test_dirs, mode="analyze_and_ignore")
            input("\nPress Enter to continue...")
            
        elif choice.lower() == 'q':
            print("\nExiting. Goodbye!")
            break
            
        else:
            print("\n❌ Invalid choice. Please try again.")
            input("\nPress Enter to continue...")

def extract_model_version(model_name, output):
    try:
        lines = output.strip().split('\n')
        for line in lines:
            parts = line.split()
            if len(parts) >= 2 and model_name in line:
                return parts[1]
        return "unknown"
    except Exception:
        return "unknown"

if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("\n\nProgram interrupted by user. Exiting...")
        sys.exit(0) 