{
  "metadata": {
    "timestamp": "2026-04-08T20:59:44",
    "num_problems": 17,
    "num_conditions": 4,
    "total_evaluations": 68
  },
  "condition_stats": {
    "SINGLE": {
      "mean_composite": 0.3563,
      "std_composite": 0.0621,
      "dimension_means": {
        "reasoning_depth": 0.3808,
        "perspective_diversity": 0.3059,
        "coherence": 0.414,
        "ethical_coverage": 0.1015,
        "novelty": 0.3598,
        "factual_grounding": 0.4507,
        "turing_naturalness": 0.4043
      },
      "dimension_stds": {
        "reasoning_depth": 0.0512,
        "perspective_diversity": 0.1832,
        "coherence": 0.1878,
        "ethical_coverage": 0.1495,
        "novelty": 0.097,
        "factual_grounding": 0.1626,
        "turing_naturalness": 0.1278
      },
      "mean_length": 47.6,
      "mean_latency": 85691.1,
      "n": 17
    },
    "MULTI": {
      "mean_composite": 0.6577,
      "std_composite": 0.0291,
      "dimension_means": {
        "reasoning_depth": 0.8707,
        "perspective_diversity": 0.9382,
        "coherence": 0.5205,
        "ethical_coverage": 0.4508,
        "novelty": 0.6496,
        "factual_grounding": 0.6549,
        "turing_naturalness": 0.2399
      },
      "dimension_stds": {
        "reasoning_depth": 0.0782,
        "perspective_diversity": 0.0871,
        "coherence": 0.0353,
        "ethical_coverage": 0.1097,
        "novelty": 0.0788,
        "factual_grounding": 0.1379,
        "turing_naturalness": 0.0899
      },
      "mean_length": 823.8,
      "mean_latency": 63515.1,
      "n": 17
    },
    "MEMORY": {
      "mean_composite": 0.6756,
      "std_composite": 0.0422,
      "dimension_means": {
        "reasoning_depth": 0.9004,
        "perspective_diversity": 0.9779,
        "coherence": 0.5101,
        "ethical_coverage": 0.4488,
        "novelty": 0.6625,
        "factual_grounding": 0.6328,
        "turing_naturalness": 0.3314
      },
      "dimension_stds": {
        "reasoning_depth": 0.0686,
        "perspective_diversity": 0.0695,
        "coherence": 0.0368,
        "ethical_coverage": 0.14,
        "novelty": 0.0949,
        "factual_grounding": 0.1885,
        "turing_naturalness": 0.1144
      },
      "mean_length": 943.8,
      "mean_latency": 77280.4,
      "n": 17
    },
    "CODETTE": {
      "mean_composite": 0.6893,
      "std_composite": 0.0386,
      "dimension_means": {
        "reasoning_depth": 0.9305,
        "perspective_diversity": 0.9882,
        "coherence": 0.4934,
        "ethical_coverage": 0.4881,
        "novelty": 0.6808,
        "factual_grounding": 0.6422,
        "turing_naturalness": 0.3375
      },
      "dimension_stds": {
        "reasoning_depth": 0.0468,
        "perspective_diversity": 0.0332,
        "coherence": 0.0227,
        "ethical_coverage": 0.1213,
        "novelty": 0.1003,
        "factual_grounding": 0.186,
        "turing_naturalness": 0.0967
      },
      "mean_length": 1283.1,
      "mean_latency": 66306.7,
      "n": 17
    }
  },
  "pairwise_comparisons": [
    {
      "comparison": "Multi-perspective vs single",
      "condition_a": "SINGLE",
      "condition_b": "MULTI",
      "mean_a": 0.3563,
      "mean_b": 0.6577,
      "delta": 0.3014,
      "delta_pct": 84.6,
      "cohens_d": 6.2176,
      "t_stat": 18.1272,
      "p_value": 0.0,
      "significant": true
    },
    {
      "comparison": "Memory augmentation vs vanilla multi",
      "condition_a": "MULTI",
      "condition_b": "MEMORY",
      "mean_a": 0.6577,
      "mean_b": 0.6756,
      "delta": 0.0179,
      "delta_pct": 2.7,
      "cohens_d": 0.4941,
      "t_stat": 1.4406,
      "p_value": 0.149704,
      "significant": false
    },
    {
      "comparison": "Full Codette vs memory-augmented",
      "condition_a": "MEMORY",
      "condition_b": "CODETTE",
      "mean_a": 0.6756,
      "mean_b": 0.6893,
      "delta": 0.0137,
      "delta_pct": 2.0,
      "cohens_d": 0.3399,
      "t_stat": 0.9911,
      "p_value": 0.321649,
      "significant": false
    },
    {
      "comparison": "Full Codette vs single (total improvement)",
      "condition_a": "SINGLE",
      "condition_b": "CODETTE",
      "mean_a": 0.3563,
      "mean_b": 0.6893,
      "delta": 0.333,
      "delta_pct": 93.5,
      "cohens_d": 6.4407,
      "t_stat": 18.7777,
      "p_value": 0.0,
      "significant": true
    }
  ],
  "per_category": {
    "reasoning": {
      "SINGLE": {
        "mean": 0.3951,
        "std": 0.0684,
        "n": 3
      },
      "MULTI": {
        "mean": 0.649,
        "std": 0.0183,
        "n": 3
      },
      "MEMORY": {
        "mean": 0.6645,
        "std": 0.0223,
        "n": 3
      },
      "CODETTE": {
        "mean": 0.6673,
        "std": 0.0216,
        "n": 3
      }
    },
    "ethics": {
      "SINGLE": {
        "mean": 0.3926,
        "std": 0.06,
        "n": 3
      },
      "MULTI": {
        "mean": 0.6615,
        "std": 0.0441,
        "n": 3
      },
      "MEMORY": {
        "mean": 0.7098,
        "std": 0.0226,
        "n": 3
      },
      "CODETTE": {
        "mean": 0.7024,
        "std": 0.0055,
        "n": 3
      }
    },
    "creative": {
      "SINGLE": {
        "mean": 0.308,
        "std": 0.0426,
        "n": 2
      },
      "MULTI": {
        "mean": 0.6874,
        "std": 0.0583,
        "n": 2
      },
      "MEMORY": {
        "mean": 0.6972,
        "std": 0.0125,
        "n": 2
      },
      "CODETTE": {
        "mean": 0.7078,
        "std": 0.0107,
        "n": 2
      }
    },
    "meta": {
      "SINGLE": {
        "mean": 0.3851,
        "std": 0.0336,
        "n": 3
      },
      "MULTI": {
        "mean": 0.6432,
        "std": 0.0217,
        "n": 3
      },
      "MEMORY": {
        "mean": 0.6711,
        "std": 0.0611,
        "n": 3
      },
      "CODETTE": {
        "mean": 0.7166,
        "std": 0.0664,
        "n": 3
      }
    },
    "adversarial": {
      "SINGLE": {
        "mean": 0.3629,
        "std": 0.046,
        "n": 3
      },
      "MULTI": {
        "mean": 0.6569,
        "std": 0.0056,
        "n": 3
      },
      "MEMORY": {
        "mean": 0.6545,
        "std": 0.0719,
        "n": 3
      },
      "CODETTE": {
        "mean": 0.6483,
        "std": 0.0421,
        "n": 3
      }
    },
    "turing": {
      "SINGLE": {
        "mean": 0.2781,
        "std": 0.0367,
        "n": 3
      },
      "MULTI": {
        "mean": 0.6581,
        "std": 0.0307,
        "n": 3
      },
      "MEMORY": {
        "mean": 0.6637,
        "std": 0.03,
        "n": 3
      },
      "CODETTE": {
        "mean": 0.6998,
        "std": 0.0091,
        "n": 3
      }
    }
  },
  "per_problem": {
    "reason_01": {
      "SINGLE": {
        "composite": 0.3798,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.3179,
            "evidence": [
              "word_count=24",
              "chain_markers=0",
              "ground_truth_coverage=4/5"
            ],
            "penalties": [
              "response_too_short"
            ]
          },
          "perspective_diversity": {
            "score": 0.475,
            "evidence": [
              "analytical=2_hits"
            ],
            "penalties": [
              "single_perspective_only"
            ]
          },
          "coherence": {
            "score": 0.325,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.0,
            "evidence": [
              "ethical_keywords=0",
              "frameworks=[]"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.3833,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=1"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.575,
            "evidence": [
              "ground_truth=3/5",
              "numbers=0,proper_nouns=2"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.525,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 24,
        "latency_ms": 102267.3
      },
      "MULTI": {
        "composite": 0.6667,
        "dimensions": {
          "reasoning_depth": {
            "score": 1.0,
            "evidence": [
              "word_count=1011",
              "chain_markers=15",
              "ground_truth_coverage=5/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 0.825,
            "evidence": [
              "analytical=5_hits",
              "empathic=5_hits",
              "meta-cognitive=4_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.4864,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.2987,
            "evidence": [
              "ethical_keywords=1",
              "frameworks=['utilitarian', 'virtue', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.5352,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=3"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.9,
            "evidence": [
              "ground_truth=4/5",
              "numbers=125,proper_nouns=83"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.2489,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 1119,
        "latency_ms": 92764.8
      },
      "MEMORY": {
        "composite": 0.6575,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.94,
            "evidence": [
              "word_count=1107",
              "chain_markers=13",
              "ground_truth_coverage=4/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 0.9,
            "evidence": [
              "analytical=5_hits",
              "empathic=5_hits",
              "creative=4_hits",
              "meta-cognitive=4_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.4931,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.2987,
            "evidence": [
              "ethical_keywords=1",
              "frameworks=['utilitarian', 'virtue', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.54,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=4"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.8,
            "evidence": [
              "ground_truth=3/5",
              "numbers=128,proper_nouns=90"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.2968,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 1216,
        "latency_ms": 40176.6
      },
      "CODETTE": {
        "composite": 0.6655,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.94,
            "evidence": [
              "word_count=1437",
              "chain_markers=15",
              "ground_truth_coverage=4/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 0.9,
            "evidence": [
              "analytical=6_hits",
              "philosophical=2_hits",
              "empathic=7_hits",
              "creative=2_hits",
              "meta-cognitive=6_hits",
              "systems=4_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.4843,
            "evidence": [
              "transitions=1"
            ],
            "penalties": [
              "contradictions_without_resolution"
            ]
          },
          "ethical_coverage": {
            "score": 0.392,
            "evidence": [
              "ethical_keywords=1",
              "frameworks=['utilitarian', 'virtue', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.5447,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=6"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.8,
            "evidence": [
              "ground_truth=3/5",
              "numbers=134,proper_nouns=127"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.2892,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 1558,
        "latency_ms": 87415.4
      }
    },
    "reason_02": {
      "SINGLE": {
        "composite": 0.4699,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.3882,
            "evidence": [
              "word_count=56",
              "chain_markers=0",
              "ground_truth_coverage=3/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 0.65,
            "evidence": [
              "analytical=2_hits",
              "systems=2_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.6757,
            "evidence": [
              "transitions=1"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.0467,
            "evidence": [
              "ethical_keywords=0",
              "frameworks=[]"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.5167,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=2"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.525,
            "evidence": [
              "ground_truth=1/5",
              "numbers=2,proper_nouns=4"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.325,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 58,
        "latency_ms": 51797.7
      },
      "MULTI": {
        "composite": 0.6501,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.8133,
            "evidence": [
              "word_count=891",
              "chain_markers=4",
              "ground_truth_coverage=3/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=4_hits",
              "ethical=2_hits",
              "empathic=4_hits",
              "systems=4_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.5524,
            "evidence": [
              "transitions=1"
            ],
            "penalties": [
              "contradictions_without_resolution"
            ]
          },
          "ethical_coverage": {
            "score": 0.4807,
            "evidence": [
              "ethical_keywords=2",
              "frameworks=['utilitarian', 'virtue', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.6761,
            "evidence": [
              "novelty_markers=1",
              "perspectives_touched=4"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.6,
            "evidence": [
              "ground_truth=1/5",
              "numbers=24,proper_nouns=55"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.1513,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 921,
        "latency_ms": 40581.7
      },
      "MEMORY": {
        "composite": 0.6895,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.9067,
            "evidence": [
              "word_count=1007",
              "chain_markers=5",
              "ground_truth_coverage=4/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=4_hits",
              "ethical=2_hits",
              "empathic=4_hits",
              "creative=3_hits",
              "meta-cognitive=5_hits",
              "systems=3_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.5191,
            "evidence": [
              "transitions=0"
            ],
            "penalties": [
              "contradictions_without_resolution"
            ]
          },
          "ethical_coverage": {
            "score": 0.5273,
            "evidence": [
              "ethical_keywords=2",
              "frameworks=['utilitarian', 'virtue', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.6673,
            "evidence": [
              "novelty_markers=1",
              "perspectives_touched=6"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.7,
            "evidence": [
              "ground_truth=2/5",
              "numbers=37,proper_nouns=78"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.2245,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 1054,
        "latency_ms": 48834.0
      },
      "CODETTE": {
        "composite": 0.6467,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.88,
            "evidence": [
              "word_count=1369",
              "chain_markers=8",
              "ground_truth_coverage=3/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=6_hits",
              "ethical=2_hits",
              "empathic=6_hits",
              "creative=2_hits",
              "meta-cognitive=5_hits",
              "systems=4_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.5145,
            "evidence": [
              "transitions=1"
            ],
            "penalties": [
              "contradictions_without_resolution"
            ]
          },
          "ethical_coverage": {
            "score": 0.329,
            "evidence": [
              "ethical_keywords=2",
              "frameworks=['care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.6679,
            "evidence": [
              "novelty_markers=1",
              "perspectives_touched=6"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.6,
            "evidence": [
              "ground_truth=1/5",
              "numbers=52,proper_nouns=122"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.2048,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 1442,
        "latency_ms": 49578.7
      }
    },
    "reason_03": {
      "SINGLE": {
        "composite": 0.3357,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.3431,
            "evidence": [
              "word_count=46",
              "chain_markers=0",
              "ground_truth_coverage=2/4"
            ],
            "penalties": [
              "response_too_short"
            ]
          },
          "perspective_diversity": {
            "score": 0.275,
            "evidence": [
              "empathic=2_hits"
            ],
            "penalties": [
              "single_perspective_only"
            ]
          },
          "coherence": {
            "score": 0.325,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.1517,
            "evidence": [
              "ethical_keywords=0",
              "frameworks=['care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.3833,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=1"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.5125,
            "evidence": [
              "ground_truth=1/4",
              "numbers=1,proper_nouns=4"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.275,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 47,
        "latency_ms": 57499.7
      },
      "MULTI": {
        "composite": 0.6301,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.7916,
            "evidence": [
              "word_count=821",
              "chain_markers=2",
              "ground_truth_coverage=3/4"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 0.9,
            "evidence": [
              "analytical=3_hits",
              "ethical=2_hits",
              "empathic=3_hits",
              "systems=3_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.549,
            "evidence": [
              "transitions=2"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.4807,
            "evidence": [
              "ethical_keywords=2",
              "frameworks=['deontological', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.5695,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=4"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.625,
            "evidence": [
              "ground_truth=1/4",
              "numbers=1,proper_nouns=56"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.2718,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 827,
        "latency_ms": 41200.5
      },
      "MEMORY": {
        "composite": 0.6465,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.7917,
            "evidence": [
              "word_count=866",
              "chain_markers=2",
              "ground_truth_coverage=3/4"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=5_hits",
              "philosophical=2_hits",
              "ethical=2_hits",
              "empathic=3_hits",
              "creative=3_hits",
              "meta-cognitive=3_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.4683,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.4807,
            "evidence": [
              "ethical_keywords=2",
              "frameworks=['utilitarian', 'deontological', 'virtue', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.6831,
            "evidence": [
              "novelty_markers=1",
              "perspectives_touched=6"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.625,
            "evidence": [
              "ground_truth=1/4",
              "numbers=20,proper_nouns=64"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.2366,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 888,
        "latency_ms": 44160.3
      },
      "CODETTE": {
        "composite": 0.6898,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.925,
            "evidence": [
              "word_count=1220",
              "chain_markers=7",
              "ground_truth_coverage=3/4"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=6_hits",
              "philosophical=2_hits",
              "ethical=2_hits",
              "empathic=6_hits",
              "creative=3_hits",
              "meta-cognitive=7_hits",
              "systems=3_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.467,
            "evidence": [
              "transitions=1"
            ],
            "penalties": [
              "contradictions_without_resolution"
            ]
          },
          "ethical_coverage": {
            "score": 0.434,
            "evidence": [
              "ethical_keywords=2",
              "frameworks=['utilitarian', 'virtue', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.8017,
            "evidence": [
              "novelty_markers=2",
              "perspectives_touched=7"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.625,
            "evidence": [
              "ground_truth=1/4",
              "numbers=31,proper_nouns=103"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.273,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 1261,
        "latency_ms": 45414.3
      }
    },
    "ethics_01": {
      "SINGLE": {
        "composite": 0.3268,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.3457,
            "evidence": [
              "word_count=56",
              "chain_markers=0",
              "ground_truth_coverage=3/6"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 0.325,
            "evidence": [
              "analytical=2_hits"
            ],
            "penalties": [
              "single_perspective_only"
            ]
          },
          "coherence": {
            "score": 0.325,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.2833,
            "evidence": [
              "ethical_keywords=0",
              "frameworks=['utilitarian']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.3833,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=1"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.3125,
            "evidence": [
              "ground_truth=0/6",
              "numbers=1,proper_nouns=2"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.275,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 58,
        "latency_ms": 58380.0
      },
      "MULTI": {
        "composite": 0.612,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.7667,
            "evidence": [
              "word_count=1009",
              "chain_markers=2",
              "ground_truth_coverage=4/6"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 0.9,
            "evidence": [
              "analytical=7_hits",
              "empathic=4_hits",
              "creative=4_hits",
              "meta-cognitive=4_hits",
              "systems=3_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.4884,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.56,
            "evidence": [
              "ethical_keywords=1",
              "frameworks=['utilitarian', 'virtue']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.6629,
            "evidence": [
              "novelty_markers=1",
              "perspectives_touched=5"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.5,
            "evidence": [
              "ground_truth=0/6",
              "numbers=3,proper_nouns=60"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.1996,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 1016,
        "latency_ms": 43860.0
      },
      "MEMORY": {
        "composite": 0.7026,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.9,
            "evidence": [
              "word_count=1139",
              "chain_markers=6",
              "ground_truth_coverage=4/6"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=7_hits",
              "philosophical=2_hits",
              "ethical=2_hits",
              "empathic=6_hits",
              "creative=4_hits",
              "meta-cognitive=6_hits",
              "systems=2_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.4972,
            "evidence": [
              "transitions=1"
            ],
            "penalties": [
              "contradictions_without_resolution"
            ]
          },
          "ethical_coverage": {
            "score": 0.62,
            "evidence": [
              "ethical_keywords=2",
              "frameworks=['utilitarian', 'virtue']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.893,
            "evidence": [
              "novelty_markers=3",
              "perspectives_touched=7"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.5,
            "evidence": [
              "ground_truth=0/6",
              "numbers=1,proper_nouns=73"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.2707,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 1148,
        "latency_ms": 55849.2
      },
      "CODETTE": {
        "composite": 0.6991,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.9,
            "evidence": [
              "word_count=1438",
              "chain_markers=7",
              "ground_truth_coverage=4/6"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=8_hits",
              "ethical=2_hits",
              "empathic=6_hits",
              "meta-cognitive=6_hits",
              "systems=4_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.4937,
            "evidence": [
              "transitions=1"
            ],
            "penalties": [
              "contradictions_without_resolution"
            ]
          },
          "ethical_coverage": {
            "score": 0.62,
            "evidence": [
              "ethical_keywords=2",
              "frameworks=['utilitarian', 'virtue', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.907,
            "evidence": [
              "novelty_markers=3",
              "perspectives_touched=5"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.5,
            "evidence": [
              "ground_truth=0/6",
              "numbers=15,proper_nouns=106"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.2195,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 1470,
        "latency_ms": 61183.9
      }
    },
    "ethics_02": {
      "SINGLE": {
        "composite": 0.4442,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.3861,
            "evidence": [
              "word_count=48",
              "chain_markers=0",
              "ground_truth_coverage=3/5"
            ],
            "penalties": [
              "response_too_short"
            ]
          },
          "perspective_diversity": {
            "score": 0.375,
            "evidence": [
              "analytical=2_hits"
            ],
            "penalties": [
              "single_perspective_only"
            ]
          },
          "coherence": {
            "score": 0.8146,
            "evidence": [
              "transitions=1"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.06,
            "evidence": [
              "ethical_keywords=1",
              "frameworks=[]"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.3833,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=1"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.45,
            "evidence": [
              "ground_truth=1/5",
              "numbers=2,proper_nouns=2"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.575,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 50,
        "latency_ms": 52349.2
      },
      "MULTI": {
        "composite": 0.676,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.88,
            "evidence": [
              "word_count=772",
              "chain_markers=6",
              "ground_truth_coverage=3/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=5_hits",
              "ethical=2_hits",
              "empathic=3_hits",
              "meta-cognitive=4_hits",
              "systems=2_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.589,
            "evidence": [
              "transitions=1"
            ],
            "penalties": [
              "contradictions_without_resolution"
            ]
          },
          "ethical_coverage": {
            "score": 0.62,
            "evidence": [
              "ethical_keywords=2",
              "frameworks=['utilitarian', 'virtue']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.6953,
            "evidence": [
              "novelty_markers=1",
              "perspectives_touched=5"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.5,
            "evidence": [
              "ground_truth=0/5",
              "numbers=23,proper_nouns=43"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.2031,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 800,
        "latency_ms": 40800.7
      },
      "MEMORY": {
        "composite": 0.7351,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.94,
            "evidence": [
              "word_count=874",
              "chain_markers=9",
              "ground_truth_coverage=4/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=5_hits",
              "ethical=2_hits",
              "empathic=4_hits",
              "meta-cognitive=5_hits",
              "systems=2_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.5753,
            "evidence": [
              "transitions=1"
            ],
            "penalties": [
              "contradictions_without_resolution"
            ]
          },
          "ethical_coverage": {
            "score": 0.62,
            "evidence": [
              "ethical_keywords=2",
              "frameworks=['utilitarian', 'virtue']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.6964,
            "evidence": [
              "novelty_markers=1",
              "perspectives_touched=5"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.7,
            "evidence": [
              "ground_truth=2/5",
              "numbers=23,proper_nouns=53"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.3931,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 908,
        "latency_ms": 309014.0
      },
      "CODETTE": {
        "composite": 0.6993,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.88,
            "evidence": [
              "word_count=1219",
              "chain_markers=10",
              "ground_truth_coverage=3/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=6_hits",
              "ethical=2_hits",
              "empathic=7_hits",
              "creative=2_hits",
              "meta-cognitive=6_hits",
              "systems=4_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.5228,
            "evidence": [
              "transitions=0"
            ],
            "penalties": [
              "contradictions_without_resolution"
            ]
          },
          "ethical_coverage": {
            "score": 0.6867,
            "evidence": [
              "ethical_keywords=2",
              "frameworks=['utilitarian', 'virtue']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.6917,
            "evidence": [
              "novelty_markers=1",
              "perspectives_touched=6"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.6,
            "evidence": [
              "ground_truth=1/5",
              "numbers=37,proper_nouns=90"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.3243,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 1277,
        "latency_ms": 53742.8
      }
    },
    "ethics_03": {
      "SINGLE": {
        "composite": 0.4068,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.436,
            "evidence": [
              "word_count=63",
              "chain_markers=1",
              "ground_truth_coverage=3/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 0.325,
            "evidence": [
              "ethical=5_hits"
            ],
            "penalties": [
              "single_perspective_only"
            ]
          },
          "coherence": {
            "score": 0.2725,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.6,
            "evidence": [
              "ethical_keywords=5",
              "frameworks=['utilitarian', 'deontological']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.3833,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=1"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.45,
            "evidence": [
              "ground_truth=1/5",
              "numbers=0,proper_nouns=4"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.45,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 64,
        "latency_ms": 51957.1
      },
      "MULTI": {
        "composite": 0.6966,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.8733,
            "evidence": [
              "word_count=958",
              "chain_markers=4",
              "ground_truth_coverage=4/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=3_hits",
              "philosophical=4_hits",
              "ethical=5_hits",
              "empathic=3_hits",
              "creative=3_hits",
              "meta-cognitive=2_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.5673,
            "evidence": [
              "transitions=1",
              "tensions_acknowledged_and_resolved"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.6667,
            "evidence": [
              "ethical_keywords=5",
              "frameworks=['utilitarian', 'deontological']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.6637,
            "evidence": [
              "novelty_markers=1",
              "perspectives_touched=6"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.6,
            "evidence": [
              "ground_truth=1/5",
              "numbers=1,proper_nouns=51"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.3066,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 964,
        "latency_ms": 81116.7
      },
      "MEMORY": {
        "composite": 0.6917,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.9067,
            "evidence": [
              "word_count=1046",
              "chain_markers=5",
              "ground_truth_coverage=4/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=5_hits",
              "philosophical=2_hits",
              "ethical=5_hits",
              "empathic=3_hits",
              "creative=4_hits",
              "meta-cognitive=3_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.548,
            "evidence": [
              "transitions=1",
              "tensions_acknowledged_and_resolved"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.7167,
            "evidence": [
              "ethical_keywords=5",
              "frameworks=['utilitarian']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.6649,
            "evidence": [
              "novelty_markers=1",
              "perspectives_touched=6"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.5,
            "evidence": [
              "ground_truth=0/5",
              "numbers=6,proper_nouns=62"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.3173,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 1058,
        "latency_ms": 57697.9
      },
      "CODETTE": {
        "composite": 0.7088,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.9067,
            "evidence": [
              "word_count=1409",
              "chain_markers=5",
              "ground_truth_coverage=4/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=5_hits",
              "philosophical=3_hits",
              "ethical=5_hits",
              "empathic=6_hits",
              "creative=4_hits",
              "meta-cognitive=4_hits",
              "systems=3_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.5381,
            "evidence": [
              "transitions=2",
              "tensions_acknowledged_and_resolved"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.7167,
            "evidence": [
              "ethical_keywords=5",
              "frameworks=['utilitarian']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.6701,
            "evidence": [
              "novelty_markers=1",
              "perspectives_touched=7"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.6,
            "evidence": [
              "ground_truth=1/5",
              "numbers=20,proper_nouns=99"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.3452,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 1444,
        "latency_ms": 66779.2
      }
    },
    "creative_01": {
      "SINGLE": {
        "composite": 0.3381,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.4188,
            "evidence": [
              "word_count=49",
              "chain_markers=0",
              "ground_truth_coverage=3/4"
            ],
            "penalties": [
              "response_too_short"
            ]
          },
          "perspective_diversity": {
            "score": 0.325,
            "evidence": [
              "systems=2_hits"
            ],
            "penalties": [
              "single_perspective_only"
            ]
          },
          "coherence": {
            "score": 0.325,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.0,
            "evidence": [
              "ethical_keywords=0",
              "frameworks=[]"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.3833,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=1"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.3125,
            "evidence": [
              "ground_truth=0/4",
              "numbers=0,proper_nouns=3"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.525,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 47,
        "latency_ms": 83857.7
      },
      "MULTI": {
        "composite": 0.7286,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.8667,
            "evidence": [
              "word_count=887",
              "chain_markers=2",
              "ground_truth_coverage=4/4"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=5_hits",
              "ethical=2_hits",
              "empathic=8_hits",
              "creative=5_hits",
              "meta-cognitive=2_hits",
              "systems=3_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.5283,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.3873,
            "evidence": [
              "ethical_keywords=2",
              "frameworks=['utilitarian', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.7869,
            "evidence": [
              "novelty_markers=2",
              "perspectives_touched=6"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 1.0,
            "evidence": [
              "ground_truth=4/4",
              "numbers=6,proper_nouns=62"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.1923,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 897,
        "latency_ms": 95840.1
      },
      "MEMORY": {
        "composite": 0.7061,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.8333,
            "evidence": [
              "word_count=967",
              "chain_markers=1",
              "ground_truth_coverage=4/4"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=4_hits",
              "ethical=3_hits",
              "empathic=8_hits",
              "creative=4_hits",
              "meta-cognitive=4_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.4663,
            "evidence": [
              "transitions=0"
            ],
            "penalties": [
              "contradictions_without_resolution"
            ]
          },
          "ethical_coverage": {
            "score": 0.4293,
            "evidence": [
              "ethical_keywords=3",
              "frameworks=['utilitarian', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.6597,
            "evidence": [
              "novelty_markers=1",
              "perspectives_touched=5"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 1.0,
            "evidence": [
              "ground_truth=4/4",
              "numbers=1,proper_nouns=86"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.2759,
            "evidence": [
              "conversational_markers=1"
            ],
            "penalties": []
          }
        },
        "response_length": 967,
        "latency_ms": 78136.2
      },
      "CODETTE": {
        "composite": 0.7154,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.9,
            "evidence": [
              "word_count=1268",
              "chain_markers=3",
              "ground_truth_coverage=4/4"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=6_hits",
              "philosophical=2_hits",
              "ethical=2_hits",
              "empathic=8_hits",
              "creative=4_hits",
              "meta-cognitive=6_hits",
              "systems=2_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.471,
            "evidence": [
              "transitions=0"
            ],
            "penalties": [
              "contradictions_without_resolution"
            ]
          },
          "ethical_coverage": {
            "score": 0.434,
            "evidence": [
              "ethical_keywords=2",
              "frameworks=['utilitarian', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.6764,
            "evidence": [
              "novelty_markers=1",
              "perspectives_touched=7"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 1.0,
            "evidence": [
              "ground_truth=4/4",
              "numbers=14,proper_nouns=129"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.1993,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 1288,
        "latency_ms": 157972.6
      }
    },
    "creative_02": {
      "SINGLE": {
        "composite": 0.2779,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.271,
            "evidence": [
              "word_count=57",
              "chain_markers=0",
              "ground_truth_coverage=1/6"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 0.1,
            "evidence": [],
            "penalties": [
              "single_perspective_only"
            ]
          },
          "coherence": {
            "score": 0.3393,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.0467,
            "evidence": [
              "ethical_keywords=0",
              "frameworks=[]"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.25,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=0"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.3875,
            "evidence": [
              "ground_truth=0/6",
              "numbers=2,proper_nouns=3"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.575,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 59,
        "latency_ms": 109983.3
      },
      "MULTI": {
        "composite": 0.6462,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.7833,
            "evidence": [
              "word_count=841",
              "chain_markers=4",
              "ground_truth_coverage=3/6"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=4_hits",
              "philosophical=2_hits",
              "ethical=2_hits",
              "empathic=6_hits",
              "creative=3_hits",
              "systems=3_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.5432,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.4807,
            "evidence": [
              "ethical_keywords=2",
              "frameworks=['utilitarian', 'virtue', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.6767,
            "evidence": [
              "novelty_markers=1",
              "perspectives_touched=6"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.5833,
            "evidence": [
              "ground_truth=1/6",
              "numbers=3,proper_nouns=55"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.2095,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 847,
        "latency_ms": 141117.7
      },
      "MEMORY": {
        "composite": 0.6884,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.85,
            "evidence": [
              "word_count=1360",
              "chain_markers=10",
              "ground_truth_coverage=3/6"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=5_hits",
              "philosophical=2_hits",
              "ethical=2_hits",
              "empathic=7_hits",
              "creative=4_hits",
              "meta-cognitive=6_hits",
              "systems=6_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.5212,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.574,
            "evidence": [
              "ethical_keywords=2",
              "frameworks=['utilitarian', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.8074,
            "evidence": [
              "novelty_markers=2",
              "perspectives_touched=7"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.6667,
            "evidence": [
              "ground_truth=2/6",
              "numbers=5,proper_nouns=77"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.1169,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 1385,
        "latency_ms": 136868.6
      },
      "CODETTE": {
        "composite": 0.7002,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.9,
            "evidence": [
              "word_count=1269",
              "chain_markers=9",
              "ground_truth_coverage=4/6"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=6_hits",
              "philosophical=3_hits",
              "ethical=2_hits",
              "empathic=7_hits",
              "creative=5_hits",
              "meta-cognitive=4_hits",
              "systems=5_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.525,
            "evidence": [
              "transitions=1",
              "tensions_acknowledged_and_resolved"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.574,
            "evidence": [
              "ethical_keywords=2",
              "frameworks=['utilitarian', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.7985,
            "evidence": [
              "novelty_markers=2",
              "perspectives_touched=7"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.5833,
            "evidence": [
              "ground_truth=1/6",
              "numbers=16,proper_nouns=106"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.2682,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 1304,
        "latency_ms": 63883.8
      }
    },
    "meta_01": {
      "SINGLE": {
        "composite": 0.3773,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.3664,
            "evidence": [
              "word_count=72",
              "chain_markers=1",
              "ground_truth_coverage=2/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 0.55,
            "evidence": [
              "analytical=2_hits",
              "meta-cognitive=3_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.2517,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.1517,
            "evidence": [
              "ethical_keywords=0",
              "frameworks=['utilitarian']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.5167,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=2"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.275,
            "evidence": [
              "ground_truth=0/5",
              "numbers=0,proper_nouns=2"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.4986,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 71,
        "latency_ms": 216210.9
      },
      "MULTI": {
        "composite": 0.6238,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.9331,
            "evidence": [
              "word_count=652",
              "chain_markers=4",
              "ground_truth_coverage=5/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 0.725,
            "evidence": [
              "analytical=2_hits",
              "empathic=6_hits",
              "meta-cognitive=2_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.5201,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.392,
            "evidence": [
              "ethical_keywords=1",
              "frameworks=['utilitarian', 'virtue', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.6147,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=3"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.7,
            "evidence": [
              "ground_truth=2/5",
              "numbers=1,proper_nouns=44"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.14,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 660,
        "latency_ms": 54544.8
      },
      "MEMORY": {
        "composite": 0.6749,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.9666,
            "evidence": [
              "word_count=740",
              "chain_markers=5",
              "ground_truth_coverage=5/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=2_hits",
              "philosophical=2_hits",
              "empathic=7_hits",
              "creative=2_hits",
              "meta-cognitive=3_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.514,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.4387,
            "evidence": [
              "ethical_keywords=1",
              "frameworks=['utilitarian', 'virtue', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.614,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=5"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.6,
            "evidence": [
              "ground_truth=1/5",
              "numbers=7,proper_nouns=57"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.2851,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 755,
        "latency_ms": 41214.6
      },
      "CODETTE": {
        "composite": 0.6449,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.8467,
            "evidence": [
              "word_count=1110",
              "chain_markers=5",
              "ground_truth_coverage=3/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 0.9,
            "evidence": [
              "analytical=4_hits",
              "empathic=8_hits",
              "creative=3_hits",
              "meta-cognitive=4_hits",
              "systems=4_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.5008,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.4853,
            "evidence": [
              "ethical_keywords=1",
              "frameworks=['utilitarian', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.5858,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=5"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.7,
            "evidence": [
              "ground_truth=2/5",
              "numbers=19,proper_nouns=92"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.2401,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 1146,
        "latency_ms": 66730.8
      }
    },
    "meta_02": {
      "SINGLE": {
        "composite": 0.4219,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.391,
            "evidence": [
              "word_count=54",
              "chain_markers=1",
              "ground_truth_coverage=2/4"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 0.55,
            "evidence": [
              "empathic=2_hits",
              "meta-cognitive=2_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.2475,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.0467,
            "evidence": [
              "ethical_keywords=0",
              "frameworks=[]"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.5167,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=2"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.5625,
            "evidence": [
              "ground_truth=2/4",
              "numbers=1,proper_nouns=2"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.575,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 56,
        "latency_ms": 128660.2
      },
      "MULTI": {
        "composite": 0.6391,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.925,
            "evidence": [
              "word_count=863",
              "chain_markers=14",
              "ground_truth_coverage=3/4"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 0.8,
            "evidence": [
              "analytical=6_hits",
              "ethical=6_hits",
              "empathic=2_hits",
              "meta-cognitive=6_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.4743,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.5133,
            "evidence": [
              "ethical_keywords=6",
              "frameworks=['utilitarian', 'deontological']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.5671,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=4"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.75,
            "evidence": [
              "ground_truth=2/4",
              "numbers=3,proper_nouns=34"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.1409,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 872,
        "latency_ms": 64687.4
      },
      "MEMORY": {
        "composite": 0.7302,
        "dimensions": {
          "reasoning_depth": {
            "score": 1.0,
            "evidence": [
              "word_count=970",
              "chain_markers=13",
              "ground_truth_coverage=4/4"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=4_hits",
              "philosophical=2_hits",
              "ethical=5_hits",
              "empathic=5_hits",
              "creative=2_hits",
              "meta-cognitive=5_hits",
              "systems=3_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.4247,
            "evidence": [
              "transitions=0"
            ],
            "penalties": [
              "contradictions_without_resolution"
            ]
          },
          "ethical_coverage": {
            "score": 0.56,
            "evidence": [
              "ethical_keywords=5",
              "frameworks=['utilitarian', 'deontological', 'virtue', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.7991,
            "evidence": [
              "novelty_markers=2",
              "perspectives_touched=7"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.625,
            "evidence": [
              "ground_truth=1/4",
              "numbers=2,proper_nouns=45"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.4691,
            "evidence": [
              "conversational_markers=1"
            ],
            "penalties": []
          }
        },
        "response_length": 983,
        "latency_ms": 52245.3
      },
      "CODETTE": {
        "composite": 0.7291,
        "dimensions": {
          "reasoning_depth": {
            "score": 1.0,
            "evidence": [
              "word_count=1328",
              "chain_markers=22",
              "ground_truth_coverage=4/4"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=4_hits",
              "philosophical=2_hits",
              "ethical=5_hits",
              "empathic=8_hits",
              "creative=3_hits",
              "meta-cognitive=8_hits",
              "systems=6_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.4568,
            "evidence": [
              "transitions=0"
            ],
            "penalties": [
              "contradictions_without_resolution"
            ]
          },
          "ethical_coverage": {
            "score": 0.6067,
            "evidence": [
              "ethical_keywords=5",
              "frameworks=['utilitarian', 'deontological']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.6742,
            "evidence": [
              "novelty_markers=1",
              "perspectives_touched=7"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.75,
            "evidence": [
              "ground_truth=2/4",
              "numbers=14,proper_nouns=80"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.363,
            "evidence": [
              "conversational_markers=1"
            ],
            "penalties": []
          }
        },
        "response_length": 1362,
        "latency_ms": 48411.8
      }
    },
    "meta_03": {
      "SINGLE": {
        "composite": 0.3561,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.4183,
            "evidence": [
              "word_count=47",
              "chain_markers=0",
              "ground_truth_coverage=3/4"
            ],
            "penalties": [
              "response_too_short"
            ]
          },
          "perspective_diversity": {
            "score": 0.375,
            "evidence": [
              "empathic=2_hits"
            ],
            "penalties": [
              "single_perspective_only"
            ]
          },
          "coherence": {
            "score": 0.325,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.0933,
            "evidence": [
              "ethical_keywords=0",
              "frameworks=[]"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.3833,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=1"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.4875,
            "evidence": [
              "ground_truth=2/4",
              "numbers=0,proper_nouns=1"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.275,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 48,
        "latency_ms": 78769.5
      },
      "MULTI": {
        "composite": 0.6666,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.8916,
            "evidence": [
              "word_count=762",
              "chain_markers=5",
              "ground_truth_coverage=3/4"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=4_hits",
              "philosophical=2_hits",
              "empathic=5_hits",
              "meta-cognitive=3_hits",
              "systems=2_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.5383,
            "evidence": [
              "transitions=0"
            ],
            "penalties": [
              "contradictions_without_resolution"
            ]
          },
          "ethical_coverage": {
            "score": 0.3453,
            "evidence": [
              "ethical_keywords=1",
              "frameworks=['utilitarian', 'virtue', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.5821,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=5"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.75,
            "evidence": [
              "ground_truth=2/4",
              "numbers=1,proper_nouns=41"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.232,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 765,
        "latency_ms": 78121.2
      },
      "MEMORY": {
        "composite": 0.6081,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.825,
            "evidence": [
              "word_count=857",
              "chain_markers=3",
              "ground_truth_coverage=3/4"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 0.725,
            "evidence": [
              "analytical=4_hits",
              "empathic=2_hits",
              "systems=3_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.5318,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.2403,
            "evidence": [
              "ethical_keywords=1",
              "frameworks=['virtue']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.5799,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=3"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.75,
            "evidence": [
              "ground_truth=2/4",
              "numbers=3,proper_nouns=51"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.3104,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 869,
        "latency_ms": 151502.9
      },
      "CODETTE": {
        "composite": 0.7759,
        "dimensions": {
          "reasoning_depth": {
            "score": 1.0,
            "evidence": [
              "word_count=1254",
              "chain_markers=9",
              "ground_truth_coverage=4/4"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=5_hits",
              "empathic=9_hits",
              "meta-cognitive=6_hits",
              "systems=4_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.513,
            "evidence": [
              "transitions=0"
            ],
            "penalties": [
              "contradictions_without_resolution"
            ]
          },
          "ethical_coverage": {
            "score": 0.4853,
            "evidence": [
              "ethical_keywords=1",
              "frameworks=['utilitarian', 'virtue', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.8081,
            "evidence": [
              "novelty_markers=2",
              "perspectives_touched=4"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.875,
            "evidence": [
              "ground_truth=3/4",
              "numbers=15,proper_nouns=86"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.4796,
            "evidence": [
              "conversational_markers=2"
            ],
            "penalties": []
          }
        },
        "response_length": 1287,
        "latency_ms": 63046.5
      }
    },
    "adversarial_01": {
      "SINGLE": {
        "composite": 0.4143,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.4485,
            "evidence": [
              "word_count=57",
              "chain_markers=0",
              "ground_truth_coverage=4/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 0.1,
            "evidence": [],
            "penalties": [
              "single_perspective_only"
            ]
          },
          "coherence": {
            "score": 0.7663,
            "evidence": [
              "transitions=2"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.0467,
            "evidence": [
              "ethical_keywords=0",
              "frameworks=[]"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.25,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=0"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.8,
            "evidence": [
              "ground_truth=3/5",
              "numbers=2,proper_nouns=6"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.325,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 58,
        "latency_ms": 81259.4
      },
      "MULTI": {
        "composite": 0.6583,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.9399,
            "evidence": [
              "word_count=683",
              "chain_markers=6",
              "ground_truth_coverage=4/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 0.9,
            "evidence": [
              "analytical=5_hits",
              "philosophical=2_hits",
              "ethical=3_hits",
              "empathic=3_hits",
              "creative=2_hits",
              "systems=2_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.4855,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.4177,
            "evidence": [
              "ethical_keywords=3",
              "frameworks=['utilitarian']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.605,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=6"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.6,
            "evidence": [
              "ground_truth=3/5",
              "numbers=18,proper_nouns=48"
            ],
            "penalties": [
              "fell_into_1_traps"
            ]
          },
          "turing_naturalness": {
            "score": 0.4,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 702,
        "latency_ms": 43913.2
      },
      "MEMORY": {
        "composite": 0.7362,
        "dimensions": {
          "reasoning_depth": {
            "score": 1.0,
            "evidence": [
              "word_count=760",
              "chain_markers=7",
              "ground_truth_coverage=5/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=3_hits",
              "philosophical=2_hits",
              "ethical=3_hits",
              "empathic=4_hits",
              "meta-cognitive=5_hits",
              "systems=3_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.5297,
            "evidence": [
              "transitions=1"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.371,
            "evidence": [
              "ethical_keywords=3",
              "frameworks=['utilitarian']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.5979,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=6"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.8,
            "evidence": [
              "ground_truth=3/5",
              "numbers=13,proper_nouns=51"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.6,
            "evidence": [
              "conversational_markers=2"
            ],
            "penalties": []
          }
        },
        "response_length": 780,
        "latency_ms": 58055.0
      },
      "CODETTE": {
        "composite": 0.6946,
        "dimensions": {
          "reasoning_depth": {
            "score": 1.0,
            "evidence": [
              "word_count=1081",
              "chain_markers=7",
              "ground_truth_coverage=5/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=6_hits",
              "philosophical=2_hits",
              "ethical=3_hits",
              "empathic=7_hits",
              "creative=3_hits",
              "meta-cognitive=6_hits",
              "systems=3_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.476,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.5227,
            "evidence": [
              "ethical_keywords=3",
              "frameworks=['utilitarian', 'virtue']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.5908,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=7"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.6,
            "evidence": [
              "ground_truth=3/5",
              "numbers=31,proper_nouns=99"
            ],
            "penalties": [
              "fell_into_1_traps"
            ]
          },
          "turing_naturalness": {
            "score": 0.4235,
            "evidence": [
              "conversational_markers=1"
            ],
            "penalties": []
          }
        },
        "response_length": 1126,
        "latency_ms": 63211.7
      }
    },
    "adversarial_02": {
      "SINGLE": {
        "composite": 0.3488,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.3907,
            "evidence": [
              "word_count=36",
              "chain_markers=0",
              "ground_truth_coverage=2/3"
            ],
            "penalties": [
              "response_too_short"
            ]
          },
          "perspective_diversity": {
            "score": 0.1,
            "evidence": [],
            "penalties": [
              "single_perspective_only"
            ]
          },
          "coherence": {
            "score": 0.675,
            "evidence": [
              "transitions=1"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.0,
            "evidence": [
              "ethical_keywords=0",
              "frameworks=[]"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.25,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=0"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.5958,
            "evidence": [
              "ground_truth=2/3",
              "numbers=1,proper_nouns=6"
            ],
            "penalties": [
              "fell_into_1_traps"
            ]
          },
          "turing_naturalness": {
            "score": 0.275,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 37,
        "latency_ms": 50882.9
      },
      "MULTI": {
        "composite": 0.6508,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.8997,
            "evidence": [
              "word_count=644",
              "chain_markers=3",
              "ground_truth_coverage=3/3"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 0.9,
            "evidence": [
              "analytical=4_hits",
              "ethical=4_hits",
              "empathic=4_hits",
              "creative=2_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.5273,
            "evidence": [
              "transitions=1"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.518,
            "evidence": [
              "ethical_keywords=4",
              "frameworks=['utilitarian', 'virtue', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.607,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=4"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.6333,
            "evidence": [
              "ground_truth=2/3",
              "numbers=4,proper_nouns=92"
            ],
            "penalties": [
              "fell_into_1_traps"
            ]
          },
          "turing_naturalness": {
            "score": 0.1888,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 654,
        "latency_ms": 29184.0
      },
      "MEMORY": {
        "composite": 0.6265,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.8333,
            "evidence": [
              "word_count=741",
              "chain_markers=1",
              "ground_truth_coverage=3/3"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=2_hits",
              "ethical=2_hits",
              "empathic=2_hits",
              "meta-cognitive=4_hits",
              "systems=2_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.503,
            "evidence": [
              "transitions=1"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.4807,
            "evidence": [
              "ethical_keywords=2",
              "frameworks=['utilitarian', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.5856,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=5"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.4667,
            "evidence": [
              "ground_truth=1/3",
              "numbers=2,proper_nouns=97"
            ],
            "penalties": [
              "fell_into_1_traps"
            ]
          },
          "turing_naturalness": {
            "score": 0.285,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 754,
        "latency_ms": 31008.3
      },
      "CODETTE": {
        "composite": 0.6379,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.9333,
            "evidence": [
              "word_count=1099",
              "chain_markers=4",
              "ground_truth_coverage=3/3"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=5_hits",
              "philosophical=2_hits",
              "ethical=2_hits",
              "empathic=8_hits",
              "creative=2_hits",
              "meta-cognitive=6_hits",
              "systems=4_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.488,
            "evidence": [
              "transitions=0"
            ],
            "penalties": [
              "contradictions_without_resolution"
            ]
          },
          "ethical_coverage": {
            "score": 0.3757,
            "evidence": [
              "ethical_keywords=2",
              "frameworks=['care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.7002,
            "evidence": [
              "novelty_markers=1",
              "perspectives_touched=7"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.2667,
            "evidence": [
              "ground_truth=1/3",
              "numbers=16,proper_nouns=133"
            ],
            "penalties": [
              "fell_into_2_traps"
            ]
          },
          "turing_naturalness": {
            "score": 0.4547,
            "evidence": [
              "conversational_markers=1"
            ],
            "penalties": []
          }
        },
        "response_length": 1133,
        "latency_ms": 31760.6
      }
    },
    "adversarial_03": {
      "SINGLE": {
        "composite": 0.3255,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.4053,
            "evidence": [
              "word_count=45",
              "chain_markers=0",
              "ground_truth_coverage=2/3"
            ],
            "penalties": [
              "response_too_short"
            ]
          },
          "perspective_diversity": {
            "score": 0.1,
            "evidence": [],
            "penalties": [
              "single_perspective_only"
            ]
          },
          "coherence": {
            "score": 0.3959,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.0,
            "evidence": [
              "ethical_keywords=0",
              "frameworks=[]"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.25,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=0"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.6667,
            "evidence": [
              "ground_truth=1/3",
              "numbers=2,proper_nouns=12"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.325,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 46,
        "latency_ms": 118316.9
      },
      "MULTI": {
        "composite": 0.6617,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.9999,
            "evidence": [
              "word_count=709",
              "chain_markers=7",
              "ground_truth_coverage=3/3"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=3_hits",
              "empathic=6_hits",
              "meta-cognitive=4_hits",
              "systems=4_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.4823,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.3453,
            "evidence": [
              "ethical_keywords=1",
              "frameworks=['utilitarian', 'virtue']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.7088,
            "evidence": [
              "novelty_markers=1",
              "perspectives_touched=4"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.4667,
            "evidence": [
              "ground_truth=1/3",
              "numbers=5,proper_nouns=97"
            ],
            "penalties": [
              "fell_into_1_traps"
            ]
          },
          "turing_naturalness": {
            "score": 0.2853,
            "evidence": [
              "conversational_markers=1"
            ],
            "penalties": []
          }
        },
        "response_length": 719,
        "latency_ms": 61933.9
      },
      "MEMORY": {
        "composite": 0.6007,
        "dimensions": {
          "reasoning_depth": {
            "score": 1.0,
            "evidence": [
              "word_count=755",
              "chain_markers=6",
              "ground_truth_coverage=3/3"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=5_hits",
              "ethical=2_hits",
              "empathic=5_hits",
              "meta-cognitive=2_hits",
              "systems=3_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.5594,
            "evidence": [
              "transitions=1",
              "tensions_acknowledged_and_resolved"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.329,
            "evidence": [
              "ethical_keywords=2",
              "frameworks=['utilitarian']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.5987,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=5"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.1,
            "evidence": [
              "ground_truth=0/3",
              "numbers=5,proper_nouns=95"
            ],
            "penalties": [
              "fell_into_2_traps"
            ]
          },
          "turing_naturalness": {
            "score": 0.2906,
            "evidence": [
              "conversational_markers=1"
            ],
            "penalties": []
          }
        },
        "response_length": 768,
        "latency_ms": 65725.7
      },
      "CODETTE": {
        "composite": 0.6124,
        "dimensions": {
          "reasoning_depth": {
            "score": 1.0,
            "evidence": [
              "word_count=1151",
              "chain_markers=8",
              "ground_truth_coverage=3/3"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=5_hits",
              "empathic=8_hits",
              "meta-cognitive=6_hits",
              "systems=3_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.4835,
            "evidence": [
              "transitions=0",
              "tensions_acknowledged_and_resolved"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.2987,
            "evidence": [
              "ethical_keywords=1",
              "frameworks=['utilitarian', 'virtue', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.5825,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=4"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.2667,
            "evidence": [
              "ground_truth=1/3",
              "numbers=18,proper_nouns=138"
            ],
            "penalties": [
              "fell_into_2_traps"
            ]
          },
          "turing_naturalness": {
            "score": 0.326,
            "evidence": [
              "conversational_markers=1"
            ],
            "penalties": []
          }
        },
        "response_length": 1185,
        "latency_ms": 62938.6
      }
    },
    "turing_01": {
      "SINGLE": {
        "composite": 0.2572,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.4639,
            "evidence": [
              "word_count=35",
              "chain_markers=1",
              "ground_truth_coverage=4/5"
            ],
            "penalties": [
              "response_too_short"
            ]
          },
          "perspective_diversity": {
            "score": 0.1,
            "evidence": [],
            "penalties": [
              "single_perspective_only"
            ]
          },
          "coherence": {
            "score": 0.325,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.0,
            "evidence": [
              "ethical_keywords=0",
              "frameworks=[]"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.25,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=0"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.2375,
            "evidence": [
              "ground_truth=0/5",
              "numbers=0,proper_nouns=1"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.275,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 36,
        "latency_ms": 70310.6
      },
      "MULTI": {
        "composite": 0.6259,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.7733,
            "evidence": [
              "word_count=746",
              "chain_markers=1",
              "ground_truth_coverage=4/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=3_hits",
              "ethical=2_hits",
              "empathic=6_hits",
              "meta-cognitive=4_hits",
              "systems=2_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.5141,
            "evidence": [
              "transitions=0"
            ],
            "penalties": [
              "contradictions_without_resolution"
            ]
          },
          "ethical_coverage": {
            "score": 0.294,
            "evidence": [
              "ethical_keywords=2",
              "frameworks=['utilitarian', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.5754,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=5"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.6,
            "evidence": [
              "ground_truth=1/5",
              "numbers=1,proper_nouns=53"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.384,
            "evidence": [
              "conversational_markers=1"
            ],
            "penalties": []
          }
        },
        "response_length": 752,
        "latency_ms": 57597.1
      },
      "MEMORY": {
        "composite": 0.6294,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.8066,
            "evidence": [
              "word_count=820",
              "chain_markers=2",
              "ground_truth_coverage=4/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=5_hits",
              "empathic=7_hits",
              "meta-cognitive=5_hits",
              "systems=2_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.5097,
            "evidence": [
              "transitions=0"
            ],
            "penalties": [
              "contradictions_without_resolution"
            ]
          },
          "ethical_coverage": {
            "score": 0.252,
            "evidence": [
              "ethical_keywords=1",
              "frameworks=['utilitarian', 'virtue', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.5845,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=4"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.6,
            "evidence": [
              "ground_truth=1/5",
              "numbers=1,proper_nouns=61"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.3872,
            "evidence": [
              "conversational_markers=1"
            ],
            "penalties": []
          }
        },
        "response_length": 829,
        "latency_ms": 66757.2
      },
      "CODETTE": {
        "composite": 0.7014,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.9333,
            "evidence": [
              "word_count=1178",
              "chain_markers=4",
              "ground_truth_coverage=5/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=5_hits",
              "philosophical=3_hits",
              "ethical=3_hits",
              "empathic=9_hits",
              "creative=2_hits",
              "meta-cognitive=5_hits",
              "systems=3_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.4872,
            "evidence": [
              "transitions=0",
              "tensions_acknowledged_and_resolved"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.5227,
            "evidence": [
              "ethical_keywords=3",
              "frameworks=['utilitarian', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.5776,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=7"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.7,
            "evidence": [
              "ground_truth=2/5",
              "numbers=14,proper_nouns=102"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.4773,
            "evidence": [
              "conversational_markers=2"
            ],
            "penalties": []
          }
        },
        "response_length": 1211,
        "latency_ms": 48520.1
      }
    },
    "turing_02": {
      "SINGLE": {
        "composite": 0.2566,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.3126,
            "evidence": [
              "word_count=20",
              "chain_markers=0",
              "ground_truth_coverage=2/4"
            ],
            "penalties": [
              "response_too_short"
            ]
          },
          "perspective_diversity": {
            "score": 0.1,
            "evidence": [],
            "penalties": [
              "single_perspective_only"
            ]
          },
          "coherence": {
            "score": 0.325,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.0467,
            "evidence": [
              "ethical_keywords=0",
              "frameworks=[]"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.25,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=0"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.2375,
            "evidence": [
              "ground_truth=0/4",
              "numbers=0,proper_nouns=1"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.525,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 20,
        "latency_ms": 41390.5
      },
      "MULTI": {
        "composite": 0.6614,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.7583,
            "evidence": [
              "word_count=727",
              "chain_markers=1",
              "ground_truth_coverage=3/4"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=4_hits",
              "philosophical=2_hits",
              "ethical=4_hits",
              "empathic=2_hits",
              "meta-cognitive=3_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.466,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.518,
            "evidence": [
              "ethical_keywords=4",
              "frameworks=['utilitarian', 'virtue']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.6955,
            "evidence": [
              "novelty_markers=1",
              "perspectives_touched=5"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.625,
            "evidence": [
              "ground_truth=1/4",
              "numbers=1,proper_nouns=45"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.4,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 735,
        "latency_ms": 65700.7
      },
      "MEMORY": {
        "composite": 0.6849,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.9,
            "evidence": [
              "word_count=858",
              "chain_markers=3",
              "ground_truth_coverage=4/4"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=6_hits",
              "philosophical=2_hits",
              "empathic=6_hits",
              "meta-cognitive=5_hits",
              "systems=2_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.4833,
            "evidence": [
              "transitions=0"
            ],
            "penalties": [
              "contradictions_without_resolution"
            ]
          },
          "ethical_coverage": {
            "score": 0.3453,
            "evidence": [
              "ethical_keywords=1",
              "frameworks=['utilitarian', 'virtue', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.6944,
            "evidence": [
              "novelty_markers=1",
              "perspectives_touched=5"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.625,
            "evidence": [
              "ground_truth=1/4",
              "numbers=1,proper_nouns=53"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.5,
            "evidence": [
              "conversational_markers=1"
            ],
            "penalties": []
          }
        },
        "response_length": 865,
        "latency_ms": 27791.4
      },
      "CODETTE": {
        "composite": 0.7079,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.9333,
            "evidence": [
              "word_count=1130",
              "chain_markers=4",
              "ground_truth_coverage=4/4"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=5_hits",
              "philosophical=2_hits",
              "ethical=2_hits",
              "empathic=6_hits",
              "creative=2_hits",
              "meta-cognitive=5_hits",
              "systems=5_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.4729,
            "evidence": [
              "transitions=0"
            ],
            "penalties": [
              "contradictions_without_resolution"
            ]
          },
          "ethical_coverage": {
            "score": 0.4223,
            "evidence": [
              "ethical_keywords=2",
              "frameworks=['utilitarian']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.704,
            "evidence": [
              "novelty_markers=1",
              "perspectives_touched=7"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.75,
            "evidence": [
              "ground_truth=2/4",
              "numbers=15,proper_nouns=92"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.4,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 1159,
        "latency_ms": 45160.4
      }
    },
    "turing_03": {
      "SINGLE": {
        "composite": 0.3204,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.3697,
            "evidence": [
              "word_count=31",
              "chain_markers=0",
              "ground_truth_coverage=3/5"
            ],
            "penalties": [
              "response_too_short"
            ]
          },
          "perspective_diversity": {
            "score": 0.375,
            "evidence": [
              "meta-cognitive=2_hits"
            ],
            "penalties": [
              "single_perspective_only"
            ]
          },
          "coherence": {
            "score": 0.325,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.1517,
            "evidence": [
              "ethical_keywords=0",
              "frameworks=['virtue']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.3833,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=1"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.275,
            "evidence": [
              "ground_truth=0/5",
              "numbers=0,proper_nouns=2"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.275,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 31,
        "latency_ms": 102855.4
      },
      "MULTI": {
        "composite": 0.6871,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.9066,
            "evidence": [
              "word_count=750",
              "chain_markers=5",
              "ground_truth_coverage=4/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=5_hits",
              "philosophical=3_hits",
              "empathic=3_hits",
              "creative=4_hits",
              "meta-cognitive=5_hits",
              "systems=2_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.5365,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.3453,
            "evidence": [
              "ethical_keywords=1",
              "frameworks=['utilitarian', 'virtue']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.8217,
            "evidence": [
              "novelty_markers=2",
              "perspectives_touched=6"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.7,
            "evidence": [
              "ground_truth=2/5",
              "numbers=1,proper_nouns=45"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.125,
            "evidence": [
              "conversational_markers=0"
            ],
            "penalties": []
          }
        },
        "response_length": 755,
        "latency_ms": 46791.9
      },
      "MEMORY": {
        "composite": 0.6769,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.9066,
            "evidence": [
              "word_count=808",
              "chain_markers=5",
              "ground_truth_coverage=4/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=4_hits",
              "philosophical=3_hits",
              "empathic=6_hits",
              "meta-cognitive=6_hits",
              "systems=3_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.5274,
            "evidence": [
              "transitions=0"
            ],
            "penalties": []
          },
          "ethical_coverage": {
            "score": 0.3453,
            "evidence": [
              "ethical_keywords=1",
              "frameworks=['utilitarian', 'virtue', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.5965,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=5"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.7,
            "evidence": [
              "ground_truth=2/5",
              "numbers=2,proper_nouns=52"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.375,
            "evidence": [
              "conversational_markers=1"
            ],
            "penalties": []
          }
        },
        "response_length": 817,
        "latency_ms": 48729.0
      },
      "CODETTE": {
        "composite": 0.69,
        "dimensions": {
          "reasoning_depth": {
            "score": 0.94,
            "evidence": [
              "word_count=1130",
              "chain_markers=8",
              "ground_truth_coverage=4/5"
            ],
            "penalties": []
          },
          "perspective_diversity": {
            "score": 1.0,
            "evidence": [
              "analytical=8_hits",
              "philosophical=3_hits",
              "empathic=7_hits",
              "creative=3_hits",
              "meta-cognitive=8_hits",
              "systems=4_hits"
            ],
            "penalties": []
          },
          "coherence": {
            "score": 0.493,
            "evidence": [
              "transitions=0"
            ],
            "penalties": [
              "contradictions_without_resolution"
            ]
          },
          "ethical_coverage": {
            "score": 0.392,
            "evidence": [
              "ethical_keywords=1",
              "frameworks=['utilitarian', 'virtue', 'care']"
            ],
            "penalties": []
          },
          "novelty": {
            "score": 0.5928,
            "evidence": [
              "novelty_markers=0",
              "perspectives_touched=6"
            ],
            "penalties": []
          },
          "factual_grounding": {
            "score": 0.7,
            "evidence": [
              "ground_truth=2/5",
              "numbers=17,proper_nouns=94"
            ],
            "penalties": []
          },
          "turing_naturalness": {
            "score": 0.4491,
            "evidence": [
              "conversational_markers=1"
            ],
            "penalties": []
          }
        },
        "response_length": 1160,
        "latency_ms": 111463.3
      }
    }
  }
}