from __future__ import annotations
from pathlib import Path
import json
import math
import numpy as np
import pandas as pd
from .utils import ROOT, set_seed


def _safe_minmax(x: pd.Series, eps: float = 1e-6) -> pd.Series:
    """Min–Max 正規化（定数系列でもゼロ割しない）"""
    x = pd.to_numeric(x, errors="coerce")
    xmin, xmax = float(np.nanmin(x)), float(np.nanmax(x))
    rng = (xmax - xmin) if math.isfinite(xmax - xmin) else 0.0
    if rng <= eps:
        return pd.Series(np.zeros(len(x), dtype=float), index=x.index)
    return (x - xmin) / (rng + eps)


def _load_meta(path: Path) -> dict:
    """meta.yaml（任意）を辞書で返す。無ければデフォルト。"""
    # 依存を軽くするため、簡易YAMLパーサにしておく
    meta = {"lambda_D": 0.5, "beta": [0.5, 0.3, 0.2]}
    if not path.exists():
        return meta
    try:
        import yaml  # あれば読む

        with path.open("r", encoding="utf-8") as f:
            loaded = yaml.safe_load(f) or {}
        meta.update({k: loaded.get(k, v) for k, v in meta.items()})
    except Exception:
        # 行単位パース（フォールバック）
        txt = path.read_text(encoding="utf-8")
        for line in txt.splitlines():
            if line.strip().startswith("lambda_D:"):
                meta["lambda_D"] = float(line.split(":", 1)[1].strip())
            if line.strip().startswith("beta:"):
                # 例: beta: [0.5, 0.3, 0.2]
                raw = line.split(":", 1)[1].strip().strip("[]")
                vals = [float(v) for v in raw.split(",") if v.strip()]
                if len(vals) == 3:
                    meta["beta"] = vals
    return meta


def compute(
    features_path: Path = ROOT / "reports/features.parquet",
    out_path: Path = ROOT / "reports/indices.parquet",
) -> None:
    # 先に features.parquet があるかチェック
    if not features_path.exists():
        raise FileNotFoundError(
            f"{features_path} が見つかりません。まず 'python -m src.features' を実行して features.parquet を生成してください。"
        )
    """
    features.parquet を指標に変換して indices.parquet を生成。
    必須列: ['session_id','turn_id','role','avg_token','punct_ratio','anchor_density']
    指標:
      - D0, Dt   : 正規化された擬距離（プレースホルダ）
      - E_score  : 1 - Dt/(D0+ε)
      - H_t      : 1 - |Dt - D0|
      - A_t      : anchor_density
      - E_mix    : E_score と他指標の混合（meta.yaml の lambda_D, beta を使用）
    """
    set_seed()

    # 読み込み
    df = pd.read_parquet(features_path)

    # 安定した順序
    for col in ("session_id", "turn_id", "role"):
        if col not in df:
            raise KeyError(f"required column missing in features: {col}")

    df = df.sort_values(["session_id", "turn_id"]).reset_index(drop=True)

    # 入力列チェック
    req_cols = ("avg_token", "punct_ratio", "anchor_density")
    missing = [c for c in req_cols if c not in df.columns]
    if missing:
        raise KeyError(f"required feature columns missing: {missing}")

    # 正規化（Dtの移動中央値を基準D0にする）
    Dt_raw = _safe_minmax(df["punct_ratio"])  # "現在距離"の素（0-1）
    D0 = pd.Series(Dt_raw.values, index=df.index).rolling(7, center=True, min_periods=1).median()
    D0 = (D0 - D0.min()) / max(1e-6, (D0.max() - D0.min()))  # 念のため 0-1に再正規化

    Dt = Dt_raw
    A  = pd.to_numeric(df["anchor_density"], errors="coerce").fillna(0.0).clip(0, 1)

    eps = 1e-6
    E = (1.0 - (Dt / (D0 + eps))).clip(0.0, 1.0)        # 近いほど1
    H = (1.0 - (Dt - D0).abs()).clip(0.0, 1.0)          # 乖離の小ささ


    # 混合スコア（任意・メタ依存）
    meta = _load_meta(ROOT / "data" / "meta.yaml")
    lam = float(meta.get("lambda_D", 0.5))
    b1, b2, b3 = (meta.get("beta") or [0.5, 0.3, 0.2])
    # 例: E_mix = (1-λ)*E + λ*(b1*A + b2*H + b3*(1 - Dt))
    E_mix = ((1 - lam) * E + lam * (b1 * A + b2 * H + b3 * (1.0 - Dt))).clip(0.0, 1.0)

    out = df[["session_id", "turn_id", "role"]].copy()
    out["D0"] = D0
    out["Dt"] = Dt
    out["E_score"] = E
    out["H_t"] = H
    out["A_t"] = A
    out["E_mix"] = E_mix

    # 出力先ディレクトリを確保
    out_path.parent.mkdir(parents=True, exist_ok=True)
    out.to_parquet(out_path, index=False)
    print(f"indices -> {out_path} ({len(out)} rows)")


if __name__ == "__main__":
    compute()
