#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Diagonal 2-loop Standard-Model diagnostic for the down-type spectrum.

This script accompanies Dual_Koide_v20260503_6.tex (2026-05-03 phase
of the dual-Koide manuscript).  It provides an independent 2-loop SM RGE
solver that complements the 4-loop QCD-only baseline of Script_S1 /
Script_S3 / Script_S7.

Implementation summary
----------------------
1. QCD-only sub-limit (regression test): when invoked with qcd_only=True,
   the script falls back to the 4-loop QCD continuous-matching running
   identical to Script_S1 / Script_S7 V1, and the resulting K_inv(m_b)
   reproduces the QCD-only baseline value 0.667416 to >= 6 dp.  This is the most
   important sanity check of the new implementation.

2. Diagonal 2-loop SM mode: integrates the coupled (g_1, g_2, g_3, y_t, y_b,
   y_tau, lambda) RGE plus the running m_d(mu), m_s(mu), m_b(mu) at full
   2-loop order from mu = M_Z up to mu_max = 10^16 GeV.  The boundary
   conditions at mu = M_Z are obtained by 4-loop QCD-only running of the
   PDG 2024 inputs from their quoted reference scales (m_d, m_s at 2 GeV;
   m_b at m_b(m_b)) up to M_Z; the gauge couplings and the Higgs self
   coupling are fixed at their PDG / Higgs-mass-implied values at M_Z.
   The top Yukawa is fixed at m_t(m_t) = 162.5 GeV (Script_S1 convention)
   and matched downward to M_Z by 1-loop matching at mu = m_t.

3. Integration: scipy.integrate.solve_ivp with adaptive RK45,
   rtol=1e-9, atol=1e-12.  The diagonal 2-loop SM scan covers mu in [m_b, 1e16] GeV
   on a 60-point logarithmic grid.

4. Output:
   - Figure_5.png: 2-panel plot of K_inv(mu) (QCD-only vs diagonal 2-loop SM)
     the inverse-tuple Brannen shape parameters T_d_inv^fit, phi_d_inv^fit
     under diagonal 2-loop SM running.
   - Script_5_cache.npz: numpy-savez cache of the integration arrays so
     that subsequent invocations replay the figure without re-integrating.
   - stdout summary: K_inv at the illustrative scales {m_b, M_Z, 1 TeV,
     10 TeV, 1 PeV, 1e16 GeV} for both QCD-only and diagonal 2-loop SM, used to
     populate Table S.6 of the manuscript.

References for the SM RGE coefficients
--------------------------------------
- Machacek, Vaughn, Nucl. Phys. B 222 (1983) 83;
                    B 236 (1984) 221; B 249 (1985) 70.
- Mihaila, Salomon, Steinhauser, Phys. Lett. B 730 (2014) 161;
                                  Nucl. Phys. B 875 (2013) 552.
- Chetyrkin, Zoller, Phys. Lett. B 715 (2012) 191;
                     Nucl. Phys. B 862 (2012) 871.
- Bezrukov et al., JHEP 10 (2012) 140.
- Buttazzo, Degrassi, Giardino, Giudice, Sala, Salvio, Strumia,
  JHEP 12 (2013) 089 (= 1307.3536) Appendix B (compact 2-loop SM RGE).

The 2-loop coefficients adopted here follow Buttazzo et al. 2013
(1307.3536), with the convention t = ln mu, beta_X = (16 pi^2) dX/dt for
1-loop and (16 pi^2)^2 dX/dt for the 2-loop pieces.  GUT normalization
is used for U(1)_Y: g_1 = sqrt(5/3) g'.  The Yukawa couplings y_t, y_b,
y_tau are dimensionless; lambda is the SM Higgs quartic in the
convention V_H = lambda (H^dag H)^2 / 4 (SM normalization).
"""
from __future__ import annotations

import os
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import solve_ivp
from scipy.optimize import minimize


# =============================================================================
# Constants (PDG 2024)
# =============================================================================

MZ        = 91.1876        # GeV
MW        = 80.377         # GeV
MH        = 125.25         # GeV
MT_POLE   = 172.57         # GeV  (PDG 2024 top pole mass)
MT_MT     = 162.5          # GeV  (m_t(m_t) MS-bar; Script_S1 convention)
MB_MB     = 4.183          # GeV  (m_b(m_b) MS-bar)
MC_MC     = 1.273          # GeV  (m_c(m_c) MS-bar)
MTAU      = 1.77686        # GeV  (tau pole mass; m_tau(m_tau) MS-bar within ~10 MeV)

ALPHA_S_MZ   = 0.1180      # PDG 2024
ALPHA_EM_MZ  = 1.0 / 127.951
SIN2_THETAW  = 0.23121     # on-shell PDG 2024
VEV          = 246.21965   # GeV (Higgs vev v = (sqrt(2) G_F)^{-1/2})

# Mass-related inputs (in MeV) for the down-type masses
MD_INPUT = 4.7             # m_d at mu = 2 GeV
MS_INPUT = 93.5            # m_s at mu = 2 GeV
MB_INPUT = 4183.0          # m_b at mu = m_b(m_b)
MU_LIGHT = 2.0             # GeV: PDG quoted scale for m_d, m_s

PHI_BENCH = 1.0 / 3.0
COS_DELTA_BENCH = 0.5 * np.cos(3.0 * np.pi / 8.0)


# =============================================================================
# Section 1: 4-loop QCD running engine (matches Script_S1 / Script_S7 V1)
#
#   Used both for the QCD-only sub-limit regression test and for setting
#   the M_Z boundary conditions of the diagonal 2-loop SM mode.
# =============================================================================


def _qcd_beta(nf):
    zeta3 = 1.2020569031595942
    b0 = (11.0 - 2.0 * nf / 3.0) / 4.0
    b1 = (102.0 - 38.0 * nf / 3.0) / 16.0
    b2 = (2857.0 / 2.0 - 5033.0 * nf / 18.0
          + 325.0 * nf**2 / 54.0) / 64.0
    b3 = ((149753.0 / 6.0 + 3564.0 * zeta3)
          - (1078361.0 / 162.0 + 6508.0 * zeta3 / 27.0) * nf
          + (50065.0 / 162.0 + 6472.0 * zeta3 / 81.0) * nf**2
          + 1093.0 * nf**3 / 729.0) / 256.0
    return b0, b1, b2, b3


def _qcd_gamma_m(nf):
    zeta3 = 1.2020569031595942
    zeta4 = np.pi**4 / 90.0
    zeta5 = 1.0369277551433699
    g0 = 1.0
    g1 = (202.0 / 3.0 - 20.0 * nf / 9.0) / 16.0
    g2 = (1249.0 - (2216.0 / 27.0 + 160.0 * zeta3 / 3.0) * nf
          - 140.0 * nf**2 / 81.0) / 64.0
    g3 = ((4603055.0 / 162.0 + 135680.0 * zeta3 / 27.0
           - 8800.0 * zeta5)
          - (91723.0 / 27.0 + 34192.0 * zeta3 / 9.0
             - 880.0 * zeta4 - 18400.0 * zeta5 / 9.0) * nf
          + (5242.0 / 243.0 + 800.0 * zeta3 / 9.0
             - 160.0 * zeta4 / 3.0) * nf**2
          + (-332.0 / 243.0 + 64.0 * zeta3 / 27.0) * nf**3
          ) / 256.0
    return g0, g1, g2, g3


def _qcd_run_alpha_segment(a_s_start, mu_a, mu_b, nf):
    if abs(mu_b - mu_a) / max(mu_a, 1e-30) < 1e-14:
        return a_s_start
    t_end = np.log(mu_b**2 / mu_a**2)
    b0, b1, b2, b3 = _qcd_beta(nf)
    sol = solve_ivp(lambda t, y: [-(b0 * y[0]**2 + b1 * y[0]**3
                                    + b2 * y[0]**4 + b3 * y[0]**5)],
                    [0.0, t_end], [a_s_start],
                    method="RK45", rtol=1e-12, atol=1e-15,
                    max_step=abs(t_end) / 10)
    return float(sol.y[0, -1])


def _qcd_run_mass_segment(m_start, a_s_start, mu_a, mu_b, nf):
    if abs(mu_b - mu_a) / max(mu_a, 1e-30) < 1e-14:
        return m_start, a_s_start
    t_end = np.log(mu_b**2 / mu_a**2)
    b0, b1, b2, b3 = _qcd_beta(nf)
    g0, g1, g2, g3 = _qcd_gamma_m(nf)

    def rhs(t, y):
        a, lnm = y
        da = -(b0 * a**2 + b1 * a**3 + b2 * a**4 + b3 * a**5)
        dlnm = -(g0 * a + g1 * a**2 + g2 * a**3 + g3 * a**4)
        return [da, dlnm]

    sol = solve_ivp(rhs, [0.0, t_end], [a_s_start, np.log(m_start)],
                    method="RK45", rtol=1e-12, atol=1e-15,
                    max_step=abs(t_end) / 10)
    return float(np.exp(sol.y[1, -1])), float(sol.y[0, -1])


def _qcd_get_nf(mu):
    if mu >= MT_MT:
        return 6
    if mu >= MB_MB:
        return 5
    if mu >= MC_MC:
        return 4
    return 3


def qcd_alpha_s(mu):
    """alpha_s/pi at scale mu, 4-loop, continuous matching."""
    a = ALPHA_S_MZ / np.pi
    if mu >= MZ:
        if mu < MT_MT:
            return _qcd_run_alpha_segment(a, MZ, mu, nf=5)
        a = _qcd_run_alpha_segment(a, MZ, MT_MT, nf=5)
        return _qcd_run_alpha_segment(a, MT_MT, mu, nf=6)
    if mu >= MB_MB:
        return _qcd_run_alpha_segment(a, MZ, mu, nf=5)
    a = _qcd_run_alpha_segment(a, MZ, MB_MB, nf=5)
    if mu >= MC_MC:
        return _qcd_run_alpha_segment(a, MB_MB, mu, nf=4)
    a = _qcd_run_alpha_segment(a, MB_MB, MC_MC, nf=4)
    return _qcd_run_alpha_segment(a, MC_MC, mu, nf=3)


def qcd_running_mass(m_ref, mu_ref, mu_target):
    """Quark mass at mu_target obtained by 4-loop QCD running with
    continuous matching from (m_ref, mu_ref).  Identical convention to
    Script_S1 / Script_S7 V1."""
    a_ref = qcd_alpha_s(mu_ref)
    thresholds = [MC_MC, MB_MB, MT_MT]
    if mu_target > mu_ref:
        waypoints = [mu_ref] + [t for t in thresholds
                                if mu_ref < t < mu_target] + [mu_target]
    else:
        waypoints = [mu_ref] + [t for t in reversed(thresholds)
                                if mu_target < t < mu_ref] + [mu_target]
    m, a = m_ref, a_ref
    for i in range(len(waypoints) - 1):
        mu_a, mu_b = waypoints[i], waypoints[i + 1]
        nf = _qcd_get_nf(0.5 * (mu_a + mu_b))
        m, a = _qcd_run_mass_segment(m, a, mu_a, mu_b, nf)
    return m


# =============================================================================
# Section 2: Diagonal 2-loop SM RGE
#
#   State vector y = (g_1, g_2, g_3, y_t, y_b, y_tau, lambda,
#                     ln m_d, ln m_s, ln m_b)
#   in GUT normalization for g_1 (g_1^2 = 5/3 g'^2).  All masses in MeV.
#
#   The 2-loop coefficients follow Buttazzo et al. 1307.3536 Appendix B
#   (and references therein to Machacek-Vaughn / Mihaila-Salomon-
#   Steinhauser).  We retain the dominant gauge x gauge, gauge x Yukawa,
#   Yukawa^4, and lambda contributions.
# =============================================================================


# ----- 1-loop coefficients ---------------------------------------------------

def _b1_gauge(nf=6):
    """1-loop gauge coefficients in dgi/dt = (b_i^(1) gi^3) / 16pi^2."""
    return (41.0 / 10.0, -19.0 / 6.0, -7.0)  # SM with full Higgs doublet


# 2-loop gauge (b_ij matrix); Yukawa contributions (c_i^a)
B2_GAUGE = np.array([
    [199.0 / 50.0, 27.0 / 10.0, 44.0 / 5.0],
    [9.0 / 10.0,   35.0 / 6.0,  12.0      ],
    [11.0 / 10.0,  9.0 / 2.0,  -26.0      ],
])  # b_ij such that dgi/dt|_2 = gi^3 sum_j b_ij g_j^2 / (16pi^2)^2 + Yukawa terms

# Yukawa contributions to 2-loop gauge:  - c_i^t y_t^2 - c_i^b y_b^2 - c_i^tau y_tau^2
#   For SM in GUT normalization:
#     dg_1/dt|_2 += -g_1^3 * [ (17/10) y_t^2 + (1/2) y_b^2 + (3/2) y_tau^2 ] / (16pi^2)^2
#     dg_2/dt|_2 += -g_2^3 * [ (3/2)  y_t^2 + (3/2) y_b^2 + (1/2) y_tau^2 ] / (16pi^2)^2
#     dg_3/dt|_2 += -g_3^3 * [  2     y_t^2 +  2    y_b^2                    ] / (16pi^2)^2
C_GAUGE_YUK = np.array([
    [17.0 / 10.0, 1.0 / 2.0, 3.0 / 2.0],
    [3.0 / 2.0,   3.0 / 2.0, 1.0 / 2.0],
    [2.0,         2.0,       0.0      ],
])


def sm_rhs(t, y, qcd_only=False):
    """RHS for the diagonal 2-loop SM RGE state vector.

    State vector y[0..9]:
      y[0] = g_1, y[1] = g_2, y[2] = g_3,
      y[3] = y_t, y[4] = y_b, y[5] = y_tau,
      y[6] = lambda (Higgs quartic),
      y[7] = ln y_d, y[8] = ln y_s, y[9] = ln y_b.

    Note on conventions: y[7..9] are the LOG of the down-type Yukawas
    (treated as independent variables for numerical convenience), with
    boundary condition y_q(M_Z) = sqrt(2) * m_q(M_Z) / v.  The "running
    mass" m_q(mu) used downstream is then identified with y_q(mu) via
    the same vev factor; for K_inv (which is homogeneous of degree 0
    under a common rescaling of all three masses) the vev cancels and
    the K_inv extracted from y[7..9] is the physical SM K_inv(mu).
    Tracking the Yukawas (not the running masses) avoids subtracting
    the universal Higgs wave-function term gamma_H^(2) from the b-quark
    2-loop anomalous dimension while leaving it in for the light
    quarks; that asymmetric subtraction would be a numerical bug.
    """
    g1, g2, g3, yt, yb, ytau, lam = y[0:7]
    inv16pi2 = 1.0 / (16.0 * np.pi**2)

    if qcd_only:
        # Not used; QCD-only running goes through qcd_running_mass.
        return [0.0] * len(y)

    # ---------- 1-loop gauge coefficients -----------------------------------
    b1_1, b1_2, b1_3 = _b1_gauge()
    dg1_1 = b1_1 * g1**3
    dg2_1 = b1_2 * g2**3
    dg3_1 = b1_3 * g3**3

    # ---------- 2-loop gauge: g^5 + Yukawa back-reaction --------------------
    g_sq = np.array([g1**2, g2**2, g3**2])
    dg1_2 = g1**3 * (B2_GAUGE[0, 0] * g_sq[0] + B2_GAUGE[0, 1] * g_sq[1]
                     + B2_GAUGE[0, 2] * g_sq[2]
                     - C_GAUGE_YUK[0, 0] * yt**2
                     - C_GAUGE_YUK[0, 1] * yb**2
                     - C_GAUGE_YUK[0, 2] * ytau**2)
    dg2_2 = g2**3 * (B2_GAUGE[1, 0] * g_sq[0] + B2_GAUGE[1, 1] * g_sq[1]
                     + B2_GAUGE[1, 2] * g_sq[2]
                     - C_GAUGE_YUK[1, 0] * yt**2
                     - C_GAUGE_YUK[1, 1] * yb**2
                     - C_GAUGE_YUK[1, 2] * ytau**2)
    dg3_2 = g3**3 * (B2_GAUGE[2, 0] * g_sq[0] + B2_GAUGE[2, 1] * g_sq[1]
                     + B2_GAUGE[2, 2] * g_sq[2]
                     - C_GAUGE_YUK[2, 0] * yt**2
                     - C_GAUGE_YUK[2, 1] * yb**2
                     - C_GAUGE_YUK[2, 2] * ytau**2)

    dg1 = inv16pi2 * dg1_1 + inv16pi2**2 * dg1_2
    dg2 = inv16pi2 * dg2_1 + inv16pi2**2 * dg2_2
    dg3 = inv16pi2 * dg3_1 + inv16pi2**2 * dg3_2

    # ---------- 1-loop Yukawa anomalous dimensions --------------------------
    # gamma_yq such that dy_q/dt = y_q gamma_yq / 16pi^2 + 2-loop / (16pi^2)^2
    g_t1 = (9.0 / 2.0) * yt**2 + (3.0 / 2.0) * yb**2 + ytau**2 \
        - (17.0 / 20.0) * g1**2 - (9.0 / 4.0) * g2**2 - 8.0 * g3**2
    g_b1 = (9.0 / 2.0) * yb**2 + (3.0 / 2.0) * yt**2 + ytau**2 \
        - (1.0 / 4.0) * g1**2 - (9.0 / 4.0) * g2**2 - 8.0 * g3**2
    g_tau1 = (5.0 / 2.0) * ytau**2 + 3.0 * yt**2 + 3.0 * yb**2 \
        - (9.0 / 4.0) * g1**2 - (9.0 / 4.0) * g2**2

    # Down-type light-quark (y_q^2 -> 0) Yukawa anomalous dimension
    g_qlight1 = (3.0 / 2.0) * yt**2 + ytau**2 \
        - (1.0 / 4.0) * g1**2 - (9.0 / 4.0) * g2**2 - 8.0 * g3**2

    # ---------- 2-loop Yukawa (dominant terms) ------------------------------
    # Buttazzo et al. 1307.3536 Eqs. B.2-B.4 (truncated to the dominant
    # y^4, y^2 g^2, g^4, lambda y^2, lambda^2 contributions).
    g_t2 = (
        - 12.0 * yt**4 - (11.0 / 4.0) * yt**2 * yb**2 - (5.0 / 4.0) * yb**4
        - (9.0 / 4.0) * yt**2 * ytau**2 - (5.0 / 4.0) * yb**2 * ytau**2
        - (9.0 / 4.0) * ytau**4
        + yt**2 * ((393.0 / 80.0) * g1**2 + (225.0 / 16.0) * g2**2
                   + 36.0 * g3**2)
        + yb**2 * ((7.0 / 80.0) * g1**2 + (99.0 / 16.0) * g2**2
                   + 4.0 * g3**2)
        + ytau**2 * ((15.0 / 8.0) * g1**2 + (15.0 / 8.0) * g2**2)
        + (1187.0 / 600.0) * g1**4 - (9.0 / 20.0) * g1**2 * g2**2
        + (19.0 / 15.0) * g1**2 * g3**2 - (23.0 / 4.0) * g2**4
        + 9.0 * g2**2 * g3**2 - 108.0 * g3**4
        + (3.0 / 2.0) * lam**2 - 6.0 * lam * yt**2
    )
    g_b2 = (
        - 12.0 * yb**4 - (11.0 / 4.0) * yt**2 * yb**2 - (5.0 / 4.0) * yt**4
        - (9.0 / 4.0) * yb**2 * ytau**2 - (5.0 / 4.0) * yt**2 * ytau**2
        - (9.0 / 4.0) * ytau**4
        + yb**2 * ((237.0 / 80.0) * g1**2 + (225.0 / 16.0) * g2**2
                   + 36.0 * g3**2)
        + yt**2 * ((91.0 / 80.0) * g1**2 + (99.0 / 16.0) * g2**2
                   + 4.0 * g3**2)
        + ytau**2 * ((15.0 / 8.0) * g1**2 + (15.0 / 8.0) * g2**2)
        - (127.0 / 600.0) * g1**4 - (27.0 / 20.0) * g1**2 * g2**2
        + (31.0 / 15.0) * g1**2 * g3**2 - (23.0 / 4.0) * g2**4
        + 9.0 * g2**2 * g3**2 - 108.0 * g3**4
        + (3.0 / 2.0) * lam**2 - 6.0 * lam * yb**2
    )
    g_tau2 = (
        - 9.0 * yt**2 * ytau**2 - 9.0 * yb**2 * ytau**2 - 3.0 * ytau**4 / 2.0
        + ytau**2 * ((537.0 / 80.0) * g1**2 + (165.0 / 16.0) * g2**2)
        + (1371.0 / 200.0) * g1**4 + (27.0 / 20.0) * g1**2 * g2**2
        - (23.0 / 4.0) * g2**4
        - 6.0 * lam * ytau**2 + (3.0 / 2.0) * lam**2
        - 9.0 * yt**4 / 4.0 - 9.0 * yb**4 / 4.0
    )
    # Down-type light-quark 2-loop Yukawa anomalous dimension (y_q^2 -> 0):
    g_qlight2 = (
        - (5.0 / 4.0) * yt**4
        - (9.0 / 4.0) * yt**2 * ytau**2 - (9.0 / 4.0) * ytau**4
        + yt**2 * ((91.0 / 80.0) * g1**2 + (99.0 / 16.0) * g2**2
                   + 4.0 * g3**2)
        + ytau**2 * ((15.0 / 8.0) * g1**2 + (15.0 / 8.0) * g2**2)
        - (127.0 / 600.0) * g1**4 - (27.0 / 20.0) * g1**2 * g2**2
        + (31.0 / 15.0) * g1**2 * g3**2 - (23.0 / 4.0) * g2**4
        + 9.0 * g2**2 * g3**2 - 108.0 * g3**4
        + (3.0 / 2.0) * lam**2
    )

    dyt = inv16pi2 * yt * g_t1 + inv16pi2**2 * yt * g_t2
    dyb = inv16pi2 * yb * g_b1 + inv16pi2**2 * yb * g_b2
    dytau = inv16pi2 * ytau * g_tau1 + inv16pi2**2 * ytau * g_tau2

    # ---------- Higgs self-coupling lambda ----------------------------------
    dlam1 = (
        12.0 * lam**2 - (9.0 / 5.0 * g1**2 + 9.0 * g2**2) * lam
        + (27.0 / 100.0) * g1**4 + (9.0 / 10.0) * g1**2 * g2**2
        + (9.0 / 4.0) * g2**4
        + 4.0 * lam * (yt**2 + yb**2 + ytau**2)
        - 4.0 * (yt**4 + yb**4 + ytau**4)
    )
    # 2-loop lambda: dominant gauge^4 + Yukawa^4 + lambda*Yukawa pieces
    dlam2 = (
        - 78.0 * lam**3
        + 18.0 * lam**2 * (g1**2 + 3.0 * g2**2)
        - lam * ((629.0 / 60.0) * g1**4 - (39.0 / 10.0) * g1**2 * g2**2
                 + (73.0 / 4.0) * g2**4)
        - 24.0 * lam**2 * (yt**2 + yb**2 + ytau**2)
        + lam * (
            yt**2 * (17.0 / 4.0 * g1**2 + 45.0 / 4.0 * g2**2 + 40.0 * g3**2)
            + yb**2 * (5.0 / 4.0 * g1**2 + 45.0 / 4.0 * g2**2 + 40.0 * g3**2)
            + ytau**2 * (25.0 / 4.0 * g1**2 + 15.0 / 4.0 * g2**2)
            - (3.0 / 2.0) * (yt**4 + yb**4) - (3.0 / 2.0) * ytau**4
        )
        + 30.0 * (yt**6 + yb**6) + 10.0 * ytau**6
    )
    dlam = inv16pi2 * dlam1 + inv16pi2**2 * dlam2

    # ---------- Down-type Yukawa running (m_q tracked via y_q) --------------
    # We evolve ln y_q for q = d, s, b directly.  d ln y_q/dt = gamma_yq.
    # For d, s the Yukawas are too small to back-react on the rest of the SM;
    # we track them as massless-Yukawa logs.  For b we use the full g_b1,
    # g_b2 above.
    dlny_d = inv16pi2 * g_qlight1 + inv16pi2**2 * g_qlight2
    dlny_s = dlny_d  # identical light-Yukawa anomalous dimension
    dlny_b = inv16pi2 * g_b1 + inv16pi2**2 * g_b2

    return [dg1, dg2, dg3, dyt, dyb, dytau, dlam,
            dlny_d, dlny_s, dlny_b]


# =============================================================================
# Section 3: Boundary conditions at M_Z and SM solver wrapper
# =============================================================================


def boundary_at_MZ():
    """Return the SM state vector y(M_Z).  Couplings from PDG 2024;
    fermion Yukawas from MS-bar masses; lambda from the Higgs mass."""
    # Gauge couplings (GUT normalization for U(1)_Y)
    e_em = np.sqrt(4.0 * np.pi * ALPHA_EM_MZ)
    cos2_w = 1.0 - SIN2_THETAW
    g_prime = e_em / np.sqrt(cos2_w)
    g2_phys = e_em / np.sqrt(SIN2_THETAW)
    g1 = np.sqrt(5.0 / 3.0) * g_prime
    g2 = g2_phys
    g3 = np.sqrt(4.0 * np.pi * ALPHA_S_MZ)

    # Top Yukawa: m_t(M_Z) MS-bar from m_t(m_t) = 162.5 by 4-loop QCD
    # running (purely QCD; subleading Yukawa correction is sub-percent
    # for y_t and is dwarfed by the QCD running).
    mt_mz_GeV = qcd_running_mass(MT_MT * 1.0e3, MT_MT, MZ) * 1.0e-3  # MeV->GeV
    yt = np.sqrt(2.0) * mt_mz_GeV / VEV
    # Bottom Yukawa: m_b(M_Z) MS-bar
    mb_mz_GeV = qcd_running_mass(MB_INPUT, MB_MB, MZ) * 1.0e-3
    yb = np.sqrt(2.0) * mb_mz_GeV / VEV
    # Tau Yukawa: m_tau(M_Z) ~ m_tau(m_tau) (QED running tiny)
    ytau = np.sqrt(2.0) * MTAU / VEV

    # Higgs self-coupling at tree level: lambda = M_h^2 / (2 v^2)
    # (1-loop matching corrections are at the percent level and not
    # essential for the 5-sig-fig K_inv target.)
    lam = MH**2 / (2.0 * VEV**2)

    # Mass boundary conditions at M_Z (in MeV) -> down-type Yukawas
    md_mz = qcd_running_mass(MD_INPUT, MU_LIGHT, MZ)  # MeV
    ms_mz = qcd_running_mass(MS_INPUT, MU_LIGHT, MZ)  # MeV
    mb_mz = qcd_running_mass(MB_INPUT, MB_MB, MZ)     # MeV
    # Convert to Yukawas via y_q = sqrt(2) m_q / v.  v in GeV; m_q in MeV.
    yd = np.sqrt(2.0) * (md_mz * 1.0e-3) / VEV
    ys = np.sqrt(2.0) * (ms_mz * 1.0e-3) / VEV
    ybq = np.sqrt(2.0) * (mb_mz * 1.0e-3) / VEV

    return np.array([g1, g2, g3, yt, yb, ytau, lam,
                     np.log(yd), np.log(ys), np.log(ybq)])


def integrate_full_SM(mu_array):
    """Integrate the diagonal 2-loop SM RGE from M_Z to mu_max, sampling at
    the requested mu_array.  Returns a dict of arrays."""
    y0 = boundary_at_MZ()
    mu_eval = np.asarray(mu_array, dtype=float)
    # Sort, integrate, then map back
    order = np.argsort(mu_eval)
    mu_sorted = mu_eval[order]
    # Forward integration from M_Z up to max(mu) and backward to min(mu)
    t0 = np.log(MZ)
    tmin = float(np.log(mu_sorted.min()))
    tmax = float(np.log(mu_sorted.max()))

    # Stage 1: forward from t0 to tmax
    if tmax > t0 + 1e-12:
        t_forward = np.unique(np.concatenate(
            [[t0], np.log(mu_sorted[mu_sorted >= MZ]), [tmax]]))
        sol_fwd = solve_ivp(lambda t, y: sm_rhs(t, y, qcd_only=False),
                            [t0, tmax], y0,
                            method="RK45", rtol=1e-9, atol=1e-12,
                            dense_output=True, max_step=0.5)
    else:
        sol_fwd = None

    # Stage 2: backward from t0 to tmin
    if tmin < t0 - 1e-12:
        sol_bwd = solve_ivp(lambda t, y: sm_rhs(t, y, qcd_only=False),
                            [t0, tmin], y0,
                            method="RK45", rtol=1e-9, atol=1e-12,
                            dense_output=True, max_step=0.5)
    else:
        sol_bwd = None

    out = np.empty((len(y0), mu_eval.size))
    for i, mu_i in enumerate(mu_eval):
        ti = np.log(mu_i)
        if ti >= t0:
            if sol_fwd is None:
                out[:, i] = y0
            else:
                out[:, i] = sol_fwd.sol(ti)
        else:
            out[:, i] = sol_bwd.sol(ti)

    # Convert ln y_q back to running mass m_q in MeV (m_q = y_q * v / sqrt(2))
    # Only ratios m_b/m_q matter for K_inv; the absolute factor cancels
    # but we use a fixed v for stable numerics.
    yd = np.exp(out[7])
    ys = np.exp(out[8])
    ybq = np.exp(out[9])
    md_arr = yd * VEV / np.sqrt(2.0) * 1.0e3   # GeV->MeV
    ms_arr = ys * VEV / np.sqrt(2.0) * 1.0e3
    mb_arr = ybq * VEV / np.sqrt(2.0) * 1.0e3

    res = {
        "mu": mu_eval,
        "g1": out[0], "g2": out[1], "g3": out[2],
        "yt": out[3], "yb": out[4], "ytau": out[5],
        "lambda": out[6],
        "md": md_arr, "ms": ms_arr, "mb": mb_arr,
    }
    return res


# =============================================================================
# Section 4: Inverse-tuple Brannen fit and observables
# =============================================================================


def kinv_from_masses(md, ms, mb):
    inv_m = 1.0 / md + 1.0 / ms + 1.0 / mb
    inv_sqrt = 1.0 / np.sqrt(md) + 1.0 / np.sqrt(ms) + 1.0 / np.sqrt(mb)
    return inv_m / inv_sqrt**2


def k_from_masses(md, ms, mb):
    s = md + ms + mb
    sq = np.sqrt(md) + np.sqrt(ms) + np.sqrt(mb)
    return s / sq**2


def brannen_inv_fit(md, ms, mb):
    """Closed-form inverse-tuple Brannen fit (A_inv, T_inv, phi_inv)
    such that 1/sqrt(m_dn) = A_inv (1 + 2 T_inv cos(phi_inv/3 + 2 k pi/3))
    with k = 4 - n.  Returns (A, T, phi)."""
    inv_sqrt = np.array([1.0 / np.sqrt(md), 1.0 / np.sqrt(ms),
                         1.0 / np.sqrt(mb)])
    # Index reversal: k = 4 - n, so the array indexed by k=1,2,3 is
    # [1/sqrt(m_b), 1/sqrt(m_s), 1/sqrt(m_d)].
    inv_sqrt_k = np.array([inv_sqrt[2], inv_sqrt[1], inv_sqrt[0]])
    A = float(np.sum(inv_sqrt_k) / 3.0)
    s = inv_sqrt_k / A - 1.0
    T2 = float(np.sum(s ** 2) / 6.0)
    T = float(np.sqrt(T2))

    def loss(phi):
        third = phi / 3.0
        c = np.array([np.cos(third + 2.0 * np.pi / 3.0),
                      np.cos(third + 4.0 * np.pi / 3.0),
                      np.cos(third)])
        return float(np.sum((s - 2.0 * T * c) ** 2))

    res = minimize(lambda x: loss(float(x[0])), x0=[0.569],
                   method="L-BFGS-B", bounds=[(0.0, 2.0 * np.pi)])
    return A, T, float(res.x[0])


# =============================================================================
# Section 5: Scan / cache / plot
# =============================================================================


CACHE_PATH = "Script_5_cache.npz"


def kinv_qcd_only_grid(mu_grid):
    """K_inv(mu) under 4-loop QCD-only running for the regression test."""
    md_arr = np.array([qcd_running_mass(MD_INPUT, MU_LIGHT, mu)
                       for mu in mu_grid])
    ms_arr = np.array([qcd_running_mass(MS_INPUT, MU_LIGHT, mu)
                       for mu in mu_grid])
    mb_arr = np.array([qcd_running_mass(MB_INPUT, MB_MB, mu)
                       for mu in mu_grid])
    kinv = np.array([kinv_from_masses(md_arr[i], ms_arr[i], mb_arr[i])
                     for i in range(mu_grid.size)])
    kk = np.array([k_from_masses(md_arr[i], ms_arr[i], mb_arr[i])
                   for i in range(mu_grid.size)])
    return md_arr, ms_arr, mb_arr, kinv, kk


def compute_or_load(force=False):
    REQUIRED = ("mu_grid", "qcd_kinv", "sm_kinv", "sm_Tinv", "sm_phiinv",
                "sm_g1", "sm_g2", "sm_g3", "sm_yt", "sm_yb", "sm_lambda")
    if (not force) and os.path.exists(CACHE_PATH):
        try:
            d = dict(np.load(CACHE_PATH))
            if all(k in d for k in REQUIRED):
                return d
        except (OSError, ValueError):
            pass
    # Main grid: mu in [m_b, 1e16] GeV, 60 log-points
    mu_grid = np.logspace(np.log10(MB_MB), 16.0, 60)

    # QCD-only: 4-loop running for all mu
    qcd_md, qcd_ms, qcd_mb, qcd_kinv, qcd_k = kinv_qcd_only_grid(mu_grid)

    # Diagonal 2-loop SM solver
    sm = integrate_full_SM(mu_grid)
    sm_kinv = np.array([kinv_from_masses(sm["md"][i], sm["ms"][i],
                                          sm["mb"][i])
                        for i in range(mu_grid.size)])
    sm_k = np.array([k_from_masses(sm["md"][i], sm["ms"][i], sm["mb"][i])
                     for i in range(mu_grid.size)])
    # Inverse-tuple Brannen fit at each mu
    Tinv = np.empty(mu_grid.size)
    phiinv = np.empty(mu_grid.size)
    Ainv = np.empty(mu_grid.size)
    for i in range(mu_grid.size):
        a, T, phi = brannen_inv_fit(sm["md"][i], sm["ms"][i], sm["mb"][i])
        Ainv[i] = a
        Tinv[i] = T
        phiinv[i] = phi

    # QCD-only inverse-tuple
    qcd_Tinv = np.empty(mu_grid.size)
    qcd_phiinv = np.empty(mu_grid.size)
    qcd_Ainv = np.empty(mu_grid.size)
    for i in range(mu_grid.size):
        a, T, phi = brannen_inv_fit(qcd_md[i], qcd_ms[i], qcd_mb[i])
        qcd_Ainv[i] = a
        qcd_Tinv[i] = T
        qcd_phiinv[i] = phi

    out = {
        "mu_grid": mu_grid,
        "qcd_md": qcd_md, "qcd_ms": qcd_ms, "qcd_mb": qcd_mb,
        "qcd_kinv": qcd_kinv, "qcd_k": qcd_k,
        "qcd_Tinv": qcd_Tinv, "qcd_phiinv": qcd_phiinv,
        "qcd_Ainv": qcd_Ainv,
        "sm_g1": sm["g1"], "sm_g2": sm["g2"], "sm_g3": sm["g3"],
        "sm_yt": sm["yt"], "sm_yb": sm["yb"], "sm_ytau": sm["ytau"],
        "sm_lambda": sm["lambda"],
        "sm_md": sm["md"], "sm_ms": sm["ms"], "sm_mb": sm["mb"],
        "sm_kinv": sm_kinv, "sm_k": sm_k,
        "sm_Tinv": Tinv, "sm_phiinv": phiinv, "sm_Ainv": Ainv,
    }
    np.savez_compressed(CACHE_PATH, **out)
    return out


def make_plot(d, output_path="Figure_5.png"):
    mu_grid = d["mu_grid"]
    fig, axes = plt.subplots(1, 2, figsize=(12.0, 5.4),
                             constrained_layout=True)

    # Panel (a): K_inv(mu) - 2/3 (residuals to the symmetric reference);
    # using log scale on |residual| so the QCD-only flat line and the
    # diagonal 2-loop SM curve are both visible at their natural ~ 10^{-3} - 10^{-6}
    # scale.
    axA = axes[0]
    qcd_res = d["qcd_kinv"] - 2.0 / 3.0
    sm_res = d["sm_kinv"] - 2.0 / 3.0
    axA.plot(mu_grid, qcd_res * 1.0e3, color="#0072B2", lw=2.0, ls="-",
             label=r"QCD-only (4-loop $\overline{\mathrm{MS}}$)")
    axA.plot(mu_grid, sm_res * 1.0e3, color="#D55E00", lw=2.0, ls="--",
             label=r"diag.\,2-loop SM")
    axA.axhline(0.0, color="black", lw=0.8, ls=":",
                label=r"symmetric $K_{\mathrm{inv}}=2/3$")
    axA.set_xscale("log")
    axA.set_xlabel(r"$\mu$ [GeV]")
    axA.set_ylabel(r"$10^{3}\,(K_{\mathrm{inv}}(\mu)-\frac{2}{3})$")
    axA.set_title(r"(a) Residual $K_{\mathrm{inv}}(\mu)-\frac{2}{3}$:"
                  r" QCD-only vs diag.\,2-loop SM")
    axA.legend(loc="best", fontsize=9, framealpha=0.92)
    axA.grid(True, which="both", ls=":", alpha=0.3)

    # Inset: difference between diagonal 2-loop SM and QCD-only, on linear scale
    # in units of 10^{-6}; shows the few-ppm physical effect.
    from matplotlib.patches import Rectangle
    iax = axA.inset_axes([0.42, 0.10, 0.55, 0.35])
    iax.plot(mu_grid, (sm_res - qcd_res) * 1.0e6,
             color="#D55E00", lw=1.6, ls="--")
    iax.axhline(0.0, color="black", lw=0.6, ls=":")
    iax.set_xscale("log")
    iax.set_ylabel(r"$10^{6}\,\Delta_{\mathrm{SM-QCD}} K_{\mathrm{inv}}$",
                   fontsize=8)
    iax.set_xlabel(r"$\mu$ [GeV]", fontsize=8)
    iax.tick_params(axis="both", labelsize=7)
    iax.grid(True, which="both", ls=":", alpha=0.3)
    iax.set_title("inset: diag. 2-loop SM minus QCD-only", fontsize=8)

    # Panel (b): T_d_inv^fit(mu) under both prescriptions
    axB = axes[1]
    axB.plot(mu_grid, d["qcd_Tinv"], color="#0072B2", lw=2.0, ls="-",
             label=r"QCD-only")
    axB.plot(mu_grid, d["sm_Tinv"], color="#009E73", lw=2.0, ls="--",
             label=r"diag.\,2-loop SM")
    axB.axhline(1.0 / np.sqrt(2.0), color="black", lw=0.8, ls=":",
                label=r"symmetric $1/\sqrt{2}$")
    axB.set_xscale("log")
    axB.set_xlabel(r"$\mu$ [GeV]")
    axB.set_ylabel(r"$T_{d\,\mathrm{inv}}^{\mathrm{fit}}(\mu)$")
    axB.set_title(r"(b) Inverse-tuple shape parameter "
                  r"$T_{d\,\mathrm{inv}}^{\mathrm{fit}}(\mu)$")
    axB.legend(loc="best", fontsize=9, framealpha=0.92)
    axB.grid(True, which="both", ls=":", alpha=0.3)

    fig.savefig(output_path, dpi=200)
    plt.close(fig)
    print("Saved " + output_path)


# =============================================================================
# Main
# =============================================================================


def main():
    print("=" * 70)
    print("Script 5: Diagonal 2-loop SM diagnostic for the down-type spectrum")
    print("=" * 70)

    # 1. Sub-limit cross-check at mu = m_b (CRITICAL regression test)
    md_mb = qcd_running_mass(MD_INPUT, MU_LIGHT, MB_MB)
    ms_mb = qcd_running_mass(MS_INPUT, MU_LIGHT, MB_MB)
    mb_mb = qcd_running_mass(MB_INPUT, MB_MB, MB_MB)
    kinv_mb_qcd = kinv_from_masses(md_mb, ms_mb, mb_mb)
    print()
    print("[1] QCD-only regression at mu = m_b(m_b) = 4.183 GeV:")
    print("    m_d(m_b) = %.10f MeV" % md_mb)
    print("    m_s(m_b) = %.10f MeV" % ms_mb)
    print("    m_b(m_b) = %.10f MeV" % mb_mb)
    print("    K_inv(m_b) = %.10f" % kinv_mb_qcd)
    print("    expected QCD-only baseline value: 0.667416")
    print("    match to 6 dp: %s"
          % ("PASS" if abs(kinv_mb_qcd - 0.667416) < 5e-7 else "FAIL"))

    if abs(kinv_mb_qcd - 0.667416) >= 5e-7:
        raise RuntimeError(
            "QCD-only sub-limit regression FAILED: K_inv(m_b) = %.10f, "
            "expected 0.667416" % kinv_mb_qcd)

    # 2. Compute / load the full grid
    print()
    print("[2] Computing or loading diagonal 2-loop SM grid (cache: %s)" % CACHE_PATH)
    d = compute_or_load(force=False)

    # 3. Print K_inv at illustrative scales
    illust = [("m_b", MB_MB), ("M_Z", MZ), ("1 TeV", 1e3),
              ("10 TeV", 1e4), ("1 PeV", 1e6), ("10^16 GeV", 1e16)]
    print()
    print("[3] K_inv(mu) at illustrative scales:")
    print("    %-12s %16s %16s %20s" %
          ("scale", "K_inv(QCD-only)", "K_inv(diag SM)",
           "Delta = SM - QCD"))
    rows = []
    for label, mu_val in illust:
        # find nearest grid index
        idx = int(np.argmin(np.abs(d["mu_grid"] - mu_val)))
        row_qcd = float(d["qcd_kinv"][idx])
        row_sm = float(d["sm_kinv"][idx])
        rows.append((label, mu_val, row_qcd, row_sm))
        print("    %-12s %16.6f %16.6f %20.3e" %
              (label, row_qcd, row_sm, row_sm - row_qcd))

    # 4. Inverse-tuple at m_b (diag. 2-loop SM) and (T_inv - 1/sqrt(2))
    idx_mb = int(np.argmin(np.abs(d["mu_grid"] - MB_MB)))
    idx_pl = int(np.argmin(np.abs(d["mu_grid"] - 1e16)))
    print()
    print("[4] Inverse-tuple T_d_inv^fit residual to 1/sqrt(2):")
    print("    at mu = m_b:    T_inv (QCD-only) = %.6f, diag SM = %.6f"
          % (d["qcd_Tinv"][idx_mb], d["sm_Tinv"][idx_mb]))
    print("    at mu = 10^16:  T_inv (QCD-only) = %.6f, diag SM = %.6f"
          % (d["qcd_Tinv"][idx_pl], d["sm_Tinv"][idx_pl]))
    print("    1/sqrt(2)    = %.6f" % (1.0 / np.sqrt(2.0)))

    # 5. Couplings sanity check at mu = M_Z and mu = m_t
    print()
    print("[5] SM coupling sanity checks:")
    iMZ = int(np.argmin(np.abs(d["mu_grid"] - MZ)))
    print("    g_1(M_Z) = %.6f" % d["sm_g1"][iMZ])
    print("    g_2(M_Z) = %.6f" % d["sm_g2"][iMZ])
    print("    g_3(M_Z) = %.6f" % d["sm_g3"][iMZ])
    print("    y_t(M_Z) = %.6f" % d["sm_yt"][iMZ])
    print("    y_b(M_Z) = %.6f" % d["sm_yb"][iMZ])
    print("    lambda(M_Z) = %.6f" % d["sm_lambda"][iMZ])
    iPlanck = int(np.argmin(np.abs(d["mu_grid"] - 1e16)))
    print("    lambda(10^16 GeV) = %.6f (should be near zero / negative)"
          % d["sm_lambda"][iPlanck])

    # 6. Render the figure
    print()
    print("[6] Rendering Figure_5.png ...")
    make_plot(d, output_path="Figure_5.png")

    return d


if __name__ == "__main__":
    main()
