import os
import pandas as pd

def save_results(filepath, results):
    """
    Save the results of a notebook run to a CSV file, appending new results to existing ones.
    Parameters:
    filepath (str): The path to the CSV file containing the notebook output.
    results (str): The path to the CSV file where the results should be saved.
    Function reads results of running a notebook, appends them to existing results (if they exist), and calculates
    percentage change.
    """
    # Check if the output file exists before trying to read it
    if not os.path.exists(filepath):
        print(f"Warning: Output file {filepath} not found. This may be due to:")
        print("1. The notebook didn't generate expected CSV output")
        print("2. The CSV export injection failed")
        print("3. The notebook completed successfully but without generating results")
        print("Skipping results saving for this iteration.")
        return
    
    try:
        notebook_output = pd.read_csv(filepath)
    except Exception as e:
        print(f"Error reading output file {filepath}: {e}")
        print("Skipping results saving for this iteration.")
        return
    
    # Detect CSV format
    has_policy_column = 'policy' in notebook_output.columns
    has_iteration_column = 'iteration' in notebook_output.columns
    has_estimator_column = 'estimator' in notebook_output.columns
    
    if has_policy_column and has_iteration_column:
        # SCOPE-RL format: iteration,policy,estimator,metric,result,percentage_change
        expected_columns = ['iteration', 'policy', 'estimator', 'metric', 'result', 'percentage_change']
        format_type = "SCOPE-RL"
    elif has_iteration_column and not has_policy_column:
        # OBP format: iteration,metric,estimator,result,percentage change
        expected_columns = ['iteration', 'metric', 'estimator', 'result', 'percentage change']
        format_type = "OBP"
    elif has_estimator_column and not has_iteration_column:
        # Wide format from notebooks (synthetic): estimator,metric1,metric2,metric3,...
        # Convert to long format
        print("Detected wide format CSV (from notebook), converting to long format...")
        format_type = "WIDE_OBP"
        
        # Convert wide format to long format
        rows = []
        for _, row in notebook_output.iterrows():
            estimator_name = row['estimator']
            for col in notebook_output.columns:
                if col != 'estimator' and pd.notna(row[col]):
                    rows.append({
                        'iteration': 0,  # Will be updated below
                        'metric': col,
                        'estimator': estimator_name,
                        'result': row[col],
                        'percentage change': 0  # Will be calculated below
                    })
        
        # Replace notebook_output with converted format
        notebook_output = pd.DataFrame(rows)
        expected_columns = ['iteration', 'metric', 'estimator', 'result', 'percentage change']
        print(f"Converted wide format to long format: {len(rows)} rows")
    else:
        print(f"Warning: Unexpected CSV format. Columns: {list(notebook_output.columns)}")
        print("Attempting to proceed with available data...")
        # Try to use OBP format as fallback
        expected_columns = ['iteration', 'metric', 'estimator', 'result', 'percentage change']
        format_type = "UNKNOWN"
    
    print(f"Detected {format_type} format CSV")
    
    # Load existing results or create new dataframe
    if not os.path.exists(results):
        iteration = 0
        df_existing = pd.DataFrame(columns=expected_columns)
    else:
        df_existing = pd.read_csv(results)
        iteration = df_existing['iteration'].max() + 1 if len(df_existing) > 0 else 0
    
    # Update iteration for current results
    notebook_output['iteration'] = iteration
    
    # Calculate percentage changes for current iteration
    if len(df_existing) > 0 and iteration > 0:
        for idx, row in notebook_output.iterrows():
            if has_policy_column:
                # SCOPE-RL format comparison
                prev_rows = df_existing[
                    (df_existing['policy'] == row['policy']) & 
                    (df_existing['estimator'] == row['estimator']) & 
                    (df_existing['metric'] == row['metric']) & 
                    (df_existing['iteration'] == iteration - 1)
                ]['result']
                
                percentage_col = 'percentage_change'
            else:
                # OBP format comparison
                prev_rows = df_existing[
                    (df_existing['estimator'] == row['estimator']) & 
                    (df_existing['metric'] == row['metric']) & 
                    (df_existing['iteration'] == iteration - 1)
                ]['result']
                
                percentage_col = 'percentage change'
            
            if len(prev_rows) > 0 and pd.notna(prev_rows.iloc[0]) and pd.notna(row['result']):
                prev_val = prev_rows.iloc[0]
                try:
                    current_val = float(row['result'])
                    prev_val = float(prev_val)
                    if prev_val != 0:
                        percentage_change = ((current_val - prev_val) / prev_val) * 100
                    else:
                        percentage_change = 0  # Handle division by zero
                    notebook_output.at[idx, percentage_col] = percentage_change
                except (ValueError, TypeError):
                    # Handle non-numeric values
                    notebook_output.at[idx, percentage_col] = 0
            else:
                # No previous value found or invalid data - treat as first occurrence
                notebook_output.at[idx, percentage_col] = 0
    else:
        # First iteration - all percentage changes are 0
        percentage_col = 'percentage_change' if has_policy_column else 'percentage change'
        notebook_output[percentage_col] = 0
    
    # Append to existing results
    df_combined = pd.concat([df_existing, notebook_output], ignore_index=True)
    df_combined.to_csv(results, index=False)
    
    # Only remove the file if it exists
    if os.path.exists(filepath):
        os.remove(filepath)