"""
QUEVEDO ENGINE v5.0 - UPGRADED WITH STEVEN'S MASTER LEXICON
============================================================
Uses the complete voynich_x_master_db.json for accurate simulation.
"""

import random
import json
import statistics
from collections import Counter
from dataclasses import dataclass
from typing import List, Dict, Tuple

# --- STEVEN'S MASTER LEXICON (from voynich_x_master_db.json) ---

MATERIAL_ROOTS = [
    "CHOL", "QOL",      # Fluid/Solvent
    "SHEDY", "SHE",     # Solid/Biomass
    "OR", "OL",         # Container/Reactor
    "KAL", "K",         # Heat/Energy
    "DOR", "DAR",       # Earth/Sediment
    "AIR", "AR",        # Vapor/Gas
    "CPHY",             # Tool/Stopper
    "CHAR", "CHOR",     # Concentrate/Essence
    "CHCTH", "CHOCKH",  # Resin/Crystal
    "AIIS",             # Raw material
    "SHOD",             # Syrup/Extract
    "KAR",              # Energy/Power
]

OPERATIONAL_ROOTS = [
    "DAIIN", "DAI",     # Mix/Agitate
    "OK", "OKEY",       # Timer/Cycle
    "TAL", "TOL",       # Separate/Decant
    "KEE", "CHE",       # Force/Seal
    "EEO",              # Intensify
    "CTH",              # Conduct/Channel
    "PCH", "CPH",       # Stopper/Seal
    "SHOCKH",           # Crystallization
    "SHEKS",            # Refined protocol
]

ALL_ROOTS = MATERIAL_ROOTS + OPERATIONAL_ROOTS

PREFIXES = ["QO", "D", "Y", "S", "OT", "F", "P", "YT", "O", ""]
SUFFIXES = ["AL", "DY", "IN", "R", "AM", "ETY", "Y", "EY", "AIN", "OL", ""]
INFIXES = ["e", "ai", "o", "ee", ""]

GALLOW_MODES = {
    'P': {"description": "Logistics/Pressure", "root_bias": MATERIAL_ROOTS},
    'F': {"description": "Thermal/Furnace", "root_bias": ["CHOL", "CHOR", "KAL", "KAR", "SHOD"]},
    'T': {"description": "Temporal/Cycling", "root_bias": ["OK", "OKEY", "TAL", "TOL", "DAIIN"]},
    'Q': {"description": "Chemical/Reagent", "root_bias": ["CHCTH", "CPHY", "PCH", "SHOCKH"]},
    'K': {"description": "Critical Control", "root_bias": ["KEE", "CTH", "SHEKS", "KAR"]},
}


class QuevedoEngineV5:
    """Enhanced engine using Steven's complete lexicon."""
    
    def __init__(self):
        self._current_suffix_idx = 0
        self._ratchet_counter = 0
        self._active_mode = 'P'
        self._brake_engaged = True
        
    def _build_word(self, mode: str) -> str:
        """Generate a single word using PRS architecture."""
        # Select root based on mode
        mode_roots = GALLOW_MODES.get(mode, GALLOW_MODES['P'])['root_bias']
        if random.random() < 0.7:
            root = random.choice(mode_roots)
        else:
            root = random.choice(ALL_ROOTS)
        
        # Optional prefix
        prefix = random.choice(PREFIXES) if random.random() < 0.6 else ""
        
        # Optional infix
        infix = random.choice(INFIXES) if random.random() < 0.3 else ""
        
        # Suffix based on ring position
        suffix = SUFFIXES[self._current_suffix_idx % len(SUFFIXES)]
        
        # Apply elision
        word = prefix + root + infix + suffix
        word = self._apply_elision(word)
        
        return word.upper()
    
    def _apply_elision(self, word: str) -> str:
        """Merge duplicate characters at junctions."""
        if len(word) < 4:
            return word
        for i in range(2, len(word) - 1):
            if word[i].lower() == word[i-1].lower():
                return word[:i] + word[i+1:]
        return word
    
    def engage_gallow(self, gallow_type: str) -> str:
        """Actuate gallow lever."""
        if gallow_type not in GALLOW_MODES:
            gallow_type = 'P'
        
        self._active_mode = gallow_type
        self._ratchet_counter += 1
        
        if self._ratchet_counter >= random.randint(2, 4):
            self._current_suffix_idx = (self._current_suffix_idx + 1) % len(SUFFIXES)
            self._ratchet_counter = 0
            self._brake_engaged = False
        else:
            self._brake_engaged = True
        
        return f"[{gallow_type}]"
    
    def simulate_shift(self, schedule: List[Tuple[str, int]]) -> str:
        """Simulate a full production shift."""
        output = []
        
        for mode, cycles in schedule:
            output.append(self.engage_gallow(mode))
            for _ in range(cycles):
                output.append(self._build_word(mode))
        
        return " ".join(output)


def calculate_jaccard(line1: List[str], line2: List[str]) -> float:
    """Calculate Jaccard similarity."""
    set1 = set(w.lower() for w in line1 if not w.startswith('['))
    set2 = set(w.lower() for w in line2 if not w.startswith('['))
    if not set1 and not set2:
        return 0.0
    intersection = len(set1 & set2)
    union = len(set1 | set2)
    return intersection / union if union > 0 else 0.0


def run_upgraded_stress_test():
    print("=" * 70)
    print("QUEVEDO ENGINE v5.0 - UPGRADED WITH STEVEN'S MASTER LEXICON")
    print("=" * 70)
    
    engine = QuevedoEngineV5()
    modes = list(GALLOW_MODES.keys())
    
    all_words = []
    all_lines = []
    target_words = 100000
    
    print(f"\n[GENERATING] Target: {target_words:,} words...")
    
    cycle = 0
    while len(all_words) < target_words:
        schedule = [(random.choice(modes), random.randint(5, 15)) for _ in range(random.randint(3, 6))]
        result = engine.simulate_shift(schedule)
        words = result.split()
        all_words.extend(words)
        
        line_size = random.randint(8, 12)
        for i in range(0, len(words), line_size):
            line = words[i:i+line_size]
            if len(line) > 3:
                all_lines.append(line)
        
        cycle += 1
        if cycle % 100 == 0:
            print(f"  Progress: {len(all_words):,} words...")
    
    print(f"\n[GENERATED DATA]")
    print(f"  Total words:      {len(all_words):,}")
    print(f"  Unique words:     {len(set(all_words)):,}")
    print(f"  Total lines:      {len(all_lines):,}")
    
    # Calculate Jaccard
    jaccard_scores = [calculate_jaccard(all_lines[i-1], all_lines[i]) for i in range(1, len(all_lines))]
    
    avg_jaccard = statistics.mean(jaccard_scores)
    med_jaccard = statistics.median(jaccard_scores)
    std_jaccard = statistics.stdev(jaccard_scores)
    zero_pct = sum(1 for j in jaccard_scores if j == 0.0) / len(jaccard_scores) * 100
    
    print(f"\n[JACCARD ANALYSIS]")
    print(f"  UPGRADED ENGINE:")
    print(f"    Average Jaccard:  {avg_jaccard:.4f}")
    print(f"    Median Jaccard:   {med_jaccard:.4f}")
    print(f"    Std Dev:          {std_jaccard:.4f}")
    print(f"    Zero-overlap %:   {zero_pct:.1f}%")
    
    print(f"\n  REAL VOYNICH:")
    print(f"    Average Jaccard:  0.0226")
    print(f"    Median Jaccard:   0.0000")
    
    # Verdict
    match_score = 100 - abs(avg_jaccard - 0.0226) / 0.0226 * 100
    match_score = max(0, min(100, match_score))
    
    if avg_jaccard < 0.05 and zero_pct > 40:
        verdict = "VALIDATED"
        symbol = "✅"
    elif avg_jaccard < 0.10:
        verdict = "PARTIAL"
        symbol = "⚠️"
    else:
        verdict = "NEEDS_TUNING"
        symbol = "🔧"
    
    print(f"\n  MATCH SCORE:        {match_score:.1f}%")
    print(f"  VERDICT:            {symbol} {verdict}")
    
    # Save results
    results = {
        "engine_version": "5.0 - Steven's Master Lexicon",
        "test_config": {
            "total_words": len(all_words),
            "unique_words": len(set(all_words)),
            "total_lines": len(all_lines),
            "lexicon_roots": len(ALL_ROOTS),
            "lexicon_prefixes": len(PREFIXES),
            "lexicon_suffixes": len(SUFFIXES)
        },
        "jaccard_analysis": {
            "average": round(avg_jaccard, 6),
            "median": round(med_jaccard, 6),
            "std_dev": round(std_jaccard, 6),
            "zero_overlap_pct": round(zero_pct, 2)
        },
        "real_voynich_comparison": {
            "avg_jaccard": 0.0226,
            "median_jaccard": 0.0,
            "match_score_pct": round(match_score, 1)
        },
        "verdict": verdict
    }
    
    with open("quevedo_v5_stress_test_results.json", 'w') as f:
        json.dump(results, f, indent=2)
    
    print(f"\n[RESULTS SAVED] quevedo_v5_stress_test_results.json")
    print("=" * 70)
    
    return results


if __name__ == "__main__":
    run_upgraded_stress_test()
