In [17]:
# bayes_glm_probit_random_intercept.py
# ------------------------------------------------------
# 二値: T2のGD(GD.Screening02) ~ T1諸指標 + 個体ランダム切片(プロビット)
# - 固定効果: Normal(0,1)(強めの縮約)
# - ランダム切片SD: HalfNormal(0.5)(非中心化)
# - 連続共変量(PHQ2As01, PHQ2Ds01)は標準化
# - サンプラ: BlackJAX or NumPyro(JAX/GPU) or CPU NUTS
# ------------------------------------------------------
import pandas as pd
import numpy as np
import pymc as pm
import pytensor.tensor as pt
import arviz as az
# ===== 0) サンプラの選択 =====
# いずれか1つを True に
use_blackjax = True
use_numpyro = False # True にすると NumPyro を使う
# 両方 False のときは CPU NUTS (pm.sample) で実行
# ===== 1) データの読み込み =====
# R 側で export した "df_clpm.csv" を想定
df = pd.read_csv("df_clpm.csv")
# ===== 2) 前処理(Rの定義に合わせる) =====
# 二値は 0/1 int に
for col in [
"GD.Screening01", "GD.Screening02", "ASRS.Positive",
"avgDailyGameTime01_bin", "avgDailyGameTime02_bin"
]:
df[col] = df[col].round().astype(int)
# 性別ダミー(女=1, 男=0)
df["female"] = (df["Gender02"] == 2).astype(int)
# 連続共変量は標準化(縮約と数値安定)
for zcol in ["PHQ2As01", "PHQ2Ds01"]:
df[zcol + "_z"] = (df[zcol] - df[zcol].mean()) / df[zcol].std(ddof=0)
# 目的変数:T2のGD(二値)
y = df["GD.Screening02"].values.astype(int)
# 説明変数(CLPMで使った最小セットに合わせる)
X_cols = [
"GD.Screening01", # 遅延従属
"avgDailyGameTime01_bin", # 長時間T1(二値)
"ASRS.Positive", # ADHD陽性
"PHQ2As01_z", # 不安(標準化)
"PHQ2Ds01_z", # 抑うつ(標準化)
"female" # 女性ダミー(1=女, 0=男)
]
X = df[X_cols].to_numpy(dtype=float)
K = X.shape[1]
# クラスター(個体)ID → 連番 index
ids, id_index = np.unique(df["MONITORSID"].values, return_inverse=True)
N_cluster = len(ids)
[Fixed effects / random intercept SD]
mean sd hdi_3% hdi_97%
intercept -2.314 0.231 -2.758 -1.959
beta[0] 1.165 0.285 0.631 1.701
beta[1] 0.245 0.311 -0.318 0.838
beta[2] 0.405 0.217 -0.004 0.804
beta[3] -0.076 0.099 -0.264 0.106
beta[4] 0.279 0.093 0.108 0.457
beta[5] -0.179 0.142 -0.445 0.083
sigma_re 0.336 0.250 0.000 0.780
[beta with names]
mean sd hdi_3% hdi_97%
beta[GD.Screening01] 1.165 0.285 0.631 1.701
beta[avgDailyGameTime01_bin] 0.245 0.311 -0.318 0.838
beta[ASRS.Positive] 0.405 0.217 -0.004 0.804
beta[PHQ2As01_z] -0.076 0.099 -0.264 0.106
beta[PHQ2Ds01_z] 0.279 0.093 0.108 0.457
beta[female] -0.179 0.142 -0.445 0.083
[Approx. AME] ΔPr(GD02=1 | glong01: 0→1) ≈ 0.0096
In [ ]:
# ===== 3) モデル(プロビット×ランダム切片:非中心化+強縮約) =====
with pm.Model() as model:
# 固定効果:強縮約
beta = pm.Normal("beta", mu=0.0, sigma=1.0, shape=K)
intercept = pm.Normal("intercept", mu=0.0, sigma=1.0)
# ランダム切片のSD:HalfNormal(0.5)
sigma_re = pm.HalfNormal("sigma_re", sigma=0.5)
# 非中心化乱数効果: z ~ N(0,1), re = z * sigma_re
z_re = pm.Normal("z_re", mu=0.0, sigma=1.0, shape=N_cluster)
re = pm.Deterministic("re", z_re * sigma_re)
# 線形予測子
# eta = intercept + X @ beta + re[id]
eta = intercept + pt.dot(X, beta) + re[id_index]
# プロビットリンク:p = Phi(eta)
p = pm.math.invprobit(eta)
# 尤度(Bernoulli)
y_obs = pm.Bernoulli("y_obs", p=p, observed=y)
In [ ]:
# ===== 4) サンプリング(JAX/GPU か CPU)=====
if use_blackjax:
# BlackJAX: 'parallel' or 'vectorized' のみ。progressbar=False で I/O 衝突回避。
idata = pm.sampling.jax.sample_jax_nuts(
draws=2000,
tune=2000,
chains=4,
random_seed=42,
target_accept=0.998,
chain_method="vectorized",
progressbar=False,
nuts_sampler="blackjax"
)
elif use_numpyro:
idata = pm.sampling.jax.sample_jax_nuts(
draws=2000,
tune=2000,
chains=4,
random_seed=42,
target_accept=0.998,
chain_method="vectorized",
progressbar=False,
nuts_sampler="numpyro"
)
else:
# CPU NUTS(JAXを使わない)
idata = pm.sample(
draws=2000,
tune=2000,
chains=4,
random_seed=42,
target_accept=0.998,
cores=4
)
In [ ]:
# --- 5.2 収束診断(個別出力:任意) ---
print("\n=== Convergence diagnostics ===")
rhat = az.rhat(idata, var_names=["intercept","beta","sigma_re"])
ess_b = az.ess(idata, var_names=["intercept","beta","sigma_re"], method="bulk")
ess_t = az.ess(idata, var_names=["intercept","beta","sigma_re"], method="tail")
print("R-hat:\n", rhat)
print("\nESS (bulk):\n", ess_b)
print("\nESS (tail):\n", ess_t)
# Divergence と BFMI(ある場合)
if "diverging" in idata.sample_stats:
div_sum = int(np.array(idata.sample_stats["diverging"]).sum())
print(f"\nDivergences: {div_sum}")
else:
print("\nDivergences: <not recorded>")
try:
bfmi = az.bfmi(idata)
print("BFMI (mean):", float(np.nanmean(bfmi)))
except Exception:
print("BFMI: <not available>")
In [18]:
# ===== 5) 結果の要約 =====
import arviz as az
import pandas as pd
import numpy as np
# --- 5.1 主要パラメータの要約(R-hat / ESS 含む) ---
summ = az.summary(
idata,
var_names=["intercept", "beta", "sigma_re"],
round_to=3
)
print("=== Summary (with R-hat / ESS) ===")
print(summ)
=== Summary (with R-hat / ESS) ===
mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk \
intercept -2.314 0.231 -2.758 -1.959 0.007 0.007 1274.137
beta[0] 1.165 0.285 0.631 1.701 0.004 0.004 6797.541
beta[1] 0.245 0.311 -0.318 0.838 0.002 0.004 15562.006
beta[2] 0.405 0.217 -0.004 0.804 0.002 0.003 10491.638
beta[3] -0.076 0.099 -0.264 0.106 0.001 0.001 8751.640
beta[4] 0.279 0.093 0.108 0.457 0.001 0.001 5116.411
beta[5] -0.179 0.142 -0.445 0.083 0.001 0.002 9850.549
sigma_re 0.336 0.250 0.000 0.780 0.008 0.005 943.634
ess_tail r_hat
intercept 1804.431 1.003
beta[0] 4313.639 1.000
beta[1] 5670.697 1.000
beta[2] 5390.486 1.001
beta[3] 5763.602 1.000
beta[4] 4952.310 1.001
beta[5] 4850.402 1.001
sigma_re 1750.422 1.004
In [19]:
# --- 5.3 beta の列名を人名に優しい名前に置換(見やすさ用) ---
# *あなたの X の順序に合わせて修正してください
xnames = [
"GD.Screening01", # 遅延従属
"avgDailyGameTime01_bin", # 長時間T1(二値)
"ASRS.Positive", # ADHD陽性
"PHQ2As01_z", # 不安(標準化)
"PHQ2Ds01_z", # 抑うつ(標準化)
"female" # 女性ダミー
]
# beta のサマリーだけ抜き出して名前を振る
beta_summ = az.summary(idata, var_names=["beta"], round_to=3)
beta_summ = beta_summ.copy()
# index: beta[0], beta[1], ... を xnames に差し替え
def _rename_beta_idx(idx: str) -> str:
# idx は "beta[i]" 形式
i = int(idx.split("[")[-1].split("]")[0]) - 1 # ArviZは1始まりのことが多いので -1
return f"beta[{xnames[i]}]" if 0 <= i < len(xnames) else idx
beta_summ.index = [ _rename_beta_idx(ix) for ix in beta_summ.index ]
print("\n=== Beta (named) ===")
print(beta_summ[["mean","sd","hdi_3%","hdi_97%","r_hat","ess_bulk","ess_tail"]])
=== Beta (named) ===
mean sd hdi_3% hdi_97% r_hat ess_bulk \
beta[0] 1.165 0.285 0.631 1.701 1.000 6797.541
beta[GD.Screening01] 0.245 0.311 -0.318 0.838 1.000 15562.006
beta[avgDailyGameTime01_bin] 0.405 0.217 -0.004 0.804 1.001 10491.638
beta[ASRS.Positive] -0.076 0.099 -0.264 0.106 1.000 8751.640
beta[PHQ2As01_z] 0.279 0.093 0.108 0.457 1.001 5116.411
beta[PHQ2Ds01_z] -0.179 0.142 -0.445 0.083 1.001 9850.549
ess_tail
beta[0] 4313.639
beta[GD.Screening01] 5670.697
beta[avgDailyGameTime01_bin] 5390.486
beta[ASRS.Positive] 5763.602
beta[PHQ2As01_z] 4952.310
beta[PHQ2Ds01_z] 4850.402
In [20]:
# --- 5.4(任意)平均限界効果の簡易評価:glong01(0→1)の影響 ---
# * プロビット係数を確率差に直に変換するには厳密には数値積分が必要。
# * ここでは CLPM と整合する「他要因は平均・REは平均(0)に固定」したときの
# 予測確率差を近似的に評価。
try:
import scipy.stats as st
# 事後サンプル取り出し
post = idata.posterior
# PyMCの事後は dim: (chain, draw, param)
beta_draws = np.array(post["beta"]).reshape(-1, len(xnames))
intercept_draws = np.array(post["intercept"]).reshape(-1)
# ランダム切片の平均は0(非中心化・平均0想定)とする
# 説明変数の代表点(平均値近傍)を0に固定、glong01だけを 0→1 に変化
x0 = np.zeros(len(xnames))
idx_glong = xnames.index("avgDailyGameTime01_bin")
# 予測確率 p = Φ(eta)
def prob_from_linpred(a, b, x):
eta = a + (b * x).sum(-1)
return st.norm.cdf(eta)
p0 = prob_from_linpred(intercept_draws, beta_draws, x0)
x1 = x0.copy(); x1[idx_glong] = 1.0
p1 = prob_from_linpred(intercept_draws, beta_draws, x1)
dP = p1 - p0
dP_mean = float(dP.mean())
hdi = az.hdi(dP, hdi_prob=0.94) # 94% HDI(任意)
print(f"\n[Approx. AME] ΔPr(GD02=1 | glong01: 0→1) ≈ {dP_mean:.4f} (94% HDI [{hdi[0]:.4f}, {hdi[1]:.4f}])")
except Exception as e:
print("\nAME calc skipped:", e)
[Approx. AME] ΔPr(GD02=1 | glong01: 0→1) ≈ 0.0137 (94% HDI [-0.0126, 0.0476])
In [21]:
# --- 5.5(任意)トレース等の可視化 ---
az.plot_trace(idata, var_names=["intercept","beta","sigma_re"]);
az.plot_rank(idata, var_names=["intercept","beta","sigma_re"]);
az.plot_energy(idata);
In [23]:
# --- 6 結果の書き出し ---
import pandas as pd
import numpy as np
import arviz as az
# 1) 全体サマリ(R-hat / ESS 含む)
summ = az.summary(idata, var_names=["intercept","beta","sigma_re"], round_to=3)
summ.to_csv("summary_fixed_sigma.csv", index=True)
# 2) beta の見やすいラベル版
xnames = [
"GD.Screening01",
"avgDailyGameTime01_bin",
"ASRS.Positive",
"PHQ2As01_z",
"PHQ2Ds01_z",
"female",
]
beta_summ = az.summary(idata, var_names=["beta"], round_to=3).copy()
def _rename_beta_idx(ix: str) -> str:
# "beta[0]" のような名前を人間可読に
i = int(ix.split("[")[-1].split("]")[0]) - 1
return f"beta[{xnames[i]}]" if 0 <= i < len(xnames) else ix
beta_summ.index = [_rename_beta_idx(ix) for ix in beta_summ.index]
beta_summ = beta_summ[["mean","sd","hdi_3%","hdi_97%","r_hat","ess_bulk","ess_tail"]]
beta_summ.to_csv("summary_beta_named.csv", index=True)
# 3) 近似AME(glong01: 0→1)の事後分布も保存
import scipy.stats as st
post = idata.posterior
b = np.array(post["beta"]).reshape(-1, len(xnames))
a = np.array(post["intercept"]).reshape(-1)
ix_g = xnames.index("avgDailyGameTime01_bin")
x0 = np.zeros(len(xnames))
p0 = st.norm.cdf(a + (b @ x0))
x1 = x0.copy(); x1[ix_g] = 1.0
p1 = st.norm.cdf(a + (b @ x1))
dP = p1 - p0
pd.DataFrame({"dP_glong01_0to1": dP}).to_csv("ame_glong01_draws.csv", index=False)
print("書き出し完了:summary_fixed_sigma.csv, summary_beta_named.csv, ame_glong01_draws.csv")
書き出し完了:summary_fixed_sigma.csv, summary_beta_named.csv, ame_glong01_draws.csv
In [ ]: