"""CSV utilities for two-agent framework.

Handles CSV parsing and saving for different framework formats.
"""
import os
import shutil
from pathlib import Path
from typing import Dict, Any, Optional


def parse_scope_rl_csv(csv_path: str, iteration: int) -> Dict[str, Any]:
    """Parse SCOPE-RL format CSV and return structured data.
    
    Args:
        csv_path: Path to the out.csv file
        iteration: Current iteration number
        
    Returns:
        Dictionary containing structured CSV data and metrics
    """
    try:
        import pandas as pd
        
        df = pd.read_csv(csv_path)
        
        # Check if we have SCOPE-RL long-format: iteration,policy,estimator,metric,result,percentage_change
        if all(col in df.columns for col in ['estimator', 'metric', 'result']):
            # Store the full DataFrame for comprehensive analysis
            result_data = {
                'iteration': iteration,
                'csv_dataframe': df.copy(),  # Full DataFrame for comprehensive analysis
                'format_type': 'SCOPE_RL',
                'available_metrics': df['metric'].unique().tolist(),
                'available_estimators': df['estimator'].unique().tolist(),
                'total_combinations': len(df)
            }
            
            # Also create flattened metrics for backward compatibility
            for _, row in df.iterrows():
                metric_key = f"{row['estimator']}_{row['metric']}"
                metric_key = metric_key.replace(' ', '_').replace('(', '').replace(')', '').replace('%', 'pct').replace('.', '_').lower()
                result_data[metric_key] = row['result']
            
            print(f"Parsed SCOPE-RL CSV: {len(df)} combinations, metrics: {result_data['available_metrics']}")
            return result_data
            
        # Handle wide-format as fallback (OBP-style)
        elif 'estimator' in df.columns:
            print("Detected wide-format CSV, converting to long-format structure...")
            # Convert to long format internally
            long_format_data = []
            for _, row in df.iterrows():
                estimator_name = row['estimator']
                for col, value in row.items():
                    if col != 'estimator' and pd.notna(value):
                        long_format_data.append({
                            'estimator': estimator_name,
                            'metric': col,
                            'result': value
                        })
            
            long_df = pd.DataFrame(long_format_data)
            result_data = {
                'iteration': iteration,
                'csv_dataframe': long_df,
                'format_type': 'OBP_CONVERTED',
                'available_metrics': long_df['metric'].unique().tolist(),
                'available_estimators': long_df['estimator'].unique().tolist(),
                'total_combinations': len(long_df)
            }
            
            # Add flattened metrics for backward compatibility
            for _, row in long_df.iterrows():
                metric_key = f"{row['estimator']}_{row['metric']}"
                metric_key = metric_key.replace(' ', '_').replace('(', '').replace(')', '').replace('%', 'pct').replace('.', '_').lower()
                result_data[metric_key] = row['result']
            
            return result_data
        else:
            print(f"Warning: CSV format not recognised in {csv_path}, columns: {df.columns.tolist()}")
            return {'iteration': iteration, 'status': 'parsing_failed', 'error': 'Unrecognised CSV format'}

    except Exception as e:
        print(f"Error parsing CSV {csv_path}: {str(e)}")
        return {'iteration': iteration, 'status': 'parsing_failed', 'error': str(e)}


def save_csv_results(csv_path: str, iteration: int, results_dir: Path) -> Optional[str]:
    """Save CSV results in the correct format for the two-agent framework.
    
    Args:
        csv_path: Path to the out.csv file
        iteration: Iteration number for naming
        results_dir: Directory to save results
        
    Returns:
        Path to the saved CSV file, or None if failed
    """
    try:
        if not os.path.exists(csv_path):
            print(f"Warning: CSV file {csv_path} not found")
            return None
        
        import pandas as pd
        
        # Read the notebook's CSV format
        df = pd.read_csv(csv_path)
        
        # Detect the format based on columns
        if all(col in df.columns for col in ['iteration', 'policy', 'estimator', 'metric', 'result']):
            # SCOPE-RL format: iteration,policy,estimator,metric,result,percentage_change
            print("Detected SCOPE-RL long format - using directly")
            
            # Update iteration numbers to match our iteration
            df['iteration'] = iteration
            
            # Ensure correct column order for SCOPE-RL
            expected_columns = ['iteration', 'policy', 'estimator', 'metric', 'result']
            if 'percentage_change' in df.columns:
                expected_columns.append('percentage_change')
            
            result_df = df[expected_columns]
            
        elif all(col in df.columns for col in ['iteration', 'metric', 'estimator', 'result']):
            # OBP format: iteration,metric,estimator,result,percentage_change (no policy)
            print("Detected OBP long format - using directly")
            
            # Update iteration numbers to match our iteration
            df['iteration'] = iteration
            
            # Ensure correct column order for OBP
            expected_columns = ['iteration', 'metric', 'estimator', 'result']
            if 'percentage_change' in df.columns:
                expected_columns.append('percentage_change')
            
            result_df = df[expected_columns]
            
        elif 'estimator' in df.columns:
            # Wide format (OBP-style): estimator,metric1,metric2,metric3,...
            print("Detected wide format - converting to OBP long format")
            rows = []
            
            # Process each estimator row
            for _, row in df.iterrows():
                estimator_name = row['estimator']
                
                # Process each metric column (skip the estimator column)
                for col in df.columns:
                    if col != 'estimator':
                        value = row[col]
                        if pd.notna(value):
                            # Create a row in OBP long format
                            new_row = {
                                'iteration': iteration,
                                'metric': col,
                                'estimator': estimator_name,
                                'result': value,
                                'percentage_change': 0
                            }
                            rows.append(new_row)
            
            # Create DataFrame in OBP long format
            result_df = pd.DataFrame(rows)
            
            # Ensure column order matches OBP format
            result_df = result_df[['iteration', 'metric', 'estimator', 'result', 'percentage_change']]
            
        else:
            print(f"Warning: CSV format not recognised, columns: {df.columns.tolist()}")
            # Fallback: save original CSV with different name
            if iteration == 0:
                fallback_path = results_dir / "baseline_results_original.csv"
            else:
                fallback_path = results_dir / f"iteration_{iteration}_results_original.csv"
            shutil.copy(csv_path, fallback_path)
            print(f"Saved original CSV format to: {fallback_path.name}")
            return str(fallback_path)
        
        # Create destination path
        if iteration == 0:
            dest_path = results_dir / "baseline_results.csv"
        else:
            dest_path = results_dir / f"iteration_{iteration}_results.csv"
        
        # Save the converted CSV
        result_df.to_csv(dest_path, index=False)
        
        print(f"CSV results converted and saved to: {dest_path.name}")
        return str(dest_path)
        
    except Exception as e:
        print(f"Error in save_csv_results: {e}")
        # Fallback: save original CSV even if conversion fails
        try:
            if iteration == 0:
                fallback_path = results_dir / "baseline_results_original.csv"
            else:
                fallback_path = results_dir / f"iteration_{iteration}_results_original.csv"
            shutil.copy(csv_path, fallback_path)
            print(f"Conversion failed, saved original format to: {fallback_path.name}")
            return str(fallback_path)
        except Exception as fallback_error:
            print(f"Both conversion and fallback failed: {fallback_error}")
            return None 