# Compare data analysis with cyclical approach vs without cyclical approach
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Prepare the data
# For cyclical approach: use treatment_cycle as grouping; we'll compute average success rates per cycle
cycle_summary = df.groupby('treatment_cycle')['treatment_success_rate'].agg(['mean', 'std', 'count']).reset_index()

# Without cyclical approach: overall treatment_success_rate distribution
overall_summary = df['treatment_success_rate']

# Create a figure with two subplots
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Plot for cyclical approach: average treatment_success_rate for each treatment cycle
sns.barplot(x='treatment_cycle', y='mean', data=cycle_summary, ax=axes[0], palette='Set2', capsize=0.1)
axes[0].set_title('Cyclical Approach: Avg Treatment Success Rate per Cycle')
axes[0].set_xlabel('Treatment Cycle')
axes[0].set_ylabel('Average Treatment Success Rate')

# Add error bars for standard deviation
axes[0].errorbar(x=range(len(cycle_summary)), y=cycle_summary['mean'], yerr=cycle_summary['std'], fmt='none', c='black', capsize=5)

# Plot for non-cyclical approach: overall distribution of treatment_success_rate
sns.histplot(overall_summary, bins=10, kde=True, ax=axes[1], color='skyblue')
axes[1].set_title('Non-Cyclical Approach: Overall Treatment Success Rate Distribution')
axes[1].set_xlabel('Treatment Success Rate')
axes[1].set_ylabel('Frequency')

plt.tight_layout()
plt.show()

# Print summary statistics for reference
print('Cycle Summary:')
print(cycle_summary)
print('\
Overall Treatment Success Rate Summary:')
print(overall_summary.describe())