#!/usr/bin/env python3
"""
Eolisa Space - Data Download and Preprocessing Script
======================================================
Author: Eolisa Space Research Division
Contact: sentinelalpha@eolisaspace.com

This script downloads and preprocesses EHT and GRAVITY data for analysis.

Usage:
    python download_data.py --output-dir ./data

Data Sources:
    - EHT Sgr A* 2017 observations (public release)
    - GRAVITY flare observations (where publicly available)
    - Simulated GRMHD snapshots for testing
"""

import argparse
import os
import sys
from pathlib import Path
import urllib.request
import hashlib
import json


class DataDownloader:
    """Download and verify scientific data files."""
    
    # Public data sources (examples - update with actual URLs)
    DATA_SOURCES = {
        'eht_sgra_2017': {
            'url': 'https://eventhorizontelescope.org/data/sgra-2017',
            'description': 'EHT Sgr A* 2017 imaging data',
            'files': [
                'sgra_hops_3601_SGRA_LO_netcal_LMTcal_10s_ALMArot_dcal.uvfits',
                'sgra_hops_3601_SGRA_HI_netcal_LMTcal_10s_ALMArot_dcal.uvfits'
            ],
            'md5': {}  # Add checksums
        },
        'synthetic_grmhd': {
            'url': 'https://zenodo.org/records/16511064/files/synthetic_grmhd_snapshots.h5',
            'description': 'Validation GRMHD snapshots for testing',
            'md5': 'PLACEHOLDER_MD5_HASH'
        }
    }
    
    def __init__(self, output_dir: str = './data'):
        """
        Initialize downloader.
        
        Parameters
        ----------
        output_dir : str
            Directory to save downloaded files
        """
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)
        
    def download_file(self, url: str, filename: str) -> Path:
        """
        Download a single file with progress indicator.
        
        Parameters
        ----------
        url : str
            URL to download from
        filename : str
            Local filename
            
        Returns
        -------
        filepath : Path
            Path to downloaded file
        """
        filepath = self.output_dir / filename
        
        if filepath.exists():
            print(f"   ✓ File already exists: {filename}")
            return filepath
        
        print(f"   Downloading: {filename}")
        
        def report_progress(block_num, block_size, total_size):
            downloaded = block_num * block_size
            percent = min(100, downloaded * 100 / total_size)
            sys.stdout.write(f"\r   Progress: {percent:.1f}%")
            sys.stdout.flush()
        
        try:
            urllib.request.urlretrieve(url, filepath, reporthook=report_progress)
            print()  # New line after progress
            print(f"   ✓ Downloaded: {filename}")
            return filepath
        
        except Exception as e:
            print(f"\n   ✗ Download failed: {e}")
            if filepath.exists():
                filepath.unlink()
            return None
    
    def verify_checksum(self, filepath: Path, expected_md5: str) -> bool:
        """
        Verify file integrity using MD5 checksum.
        
        Parameters
        ----------
        filepath : Path
            Path to file
        expected_md5 : str
            Expected MD5 hash
            
        Returns
        -------
        valid : bool
            True if checksum matches
        """
        if not expected_md5 or expected_md5 == 'PLACEHOLDER_MD5_HASH':
            print(f"   ⚠ No checksum available for verification")
            return True
        
        print(f"   Verifying checksum...")
        
        md5_hash = hashlib.md5()
        with open(filepath, 'rb') as f:
            for chunk in iter(lambda: f.read(4096), b''):
                md5_hash.update(chunk)
        
        computed_md5 = md5_hash.hexdigest()
        
        if computed_md5 == expected_md5:
            print(f"   ✓ Checksum verified")
            return True
        else:
            print(f"   ✗ Checksum mismatch!")
            print(f"     Expected: {expected_md5}")
            print(f"     Got:      {computed_md5}")
            return False
    
    def download_dataset(self, dataset_name: str):
        """
        Download a complete dataset.
        
        Parameters
        ----------
        dataset_name : str
            Name of dataset from DATA_SOURCES
        """
        if dataset_name not in self.DATA_SOURCES:
            print(f"Unknown dataset: {dataset_name}")
            print(f"Available datasets: {list(self.DATA_SOURCES.keys())}")
            return
        
        dataset = self.DATA_SOURCES[dataset_name]
        print(f"\nDownloading dataset: {dataset_name}")
        print(f"Description: {dataset['description']}")
        
        # Single file dataset
        if 'files' not in dataset:
            filename = os.path.basename(dataset['url'])
            filepath = self.download_file(dataset['url'], filename)
            
            if filepath and 'md5' in dataset:
                self.verify_checksum(filepath, dataset['md5'])
        
        # Multi-file dataset
        else:
            for filename in dataset['files']:
                url = f"{dataset['url']}/{filename}"
                filepath = self.download_file(url, filename)
                
                if filepath and filename in dataset['md5']:
                    self.verify_checksum(filepath, dataset['md5'][filename])
    
    def download_all(self):
        """Download all available datasets."""
        print("="*60)
        print("Eolisa Space - Data Download")
        print("="*60)
        
        for dataset_name in self.DATA_SOURCES.keys():
            self.download_dataset(dataset_name)
        
        print("\n" + "="*60)
        print("Download complete!")
        print(f"Data saved to: {self.output_dir.absolute()}")
        print("="*60)


def create_test_data(output_dir: str = './data'):
    """
    Create sample/validation data for testing when real data is unavailable.
    
    Parameters
    ----------
    output_dir : str
        Directory to save sample data
    """
    import numpy as np
    from astropy.io import fits
    
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)
    
    print("\nCreating sample data for testing...")
    
    # 1. Test EHT-like image
    print("   Creating sample EHT image...")
    image_size = 128
    center = image_size // 2
    y, x = np.ogrid[:image_size, :image_size]
    r = np.sqrt((x - center)**2 + (y - center)**2)
    theta = np.arctan2(y - center, x - center)
    
    # Ring structure with asymmetry
    ring_radius = 25
    ring_width = 5
    ring = np.exp(-((r - ring_radius)**2) / (2 * ring_width**2))
    asymmetry = 1 + 0.15 * np.cos(3 * theta)  # m=3 mode
    
    image = ring * asymmetry + 0.03 * np.random.randn(image_size, image_size)
    image = np.maximum(image, 0)  # Enforce positivity
    
    # Create FITS file
    hdu = fits.PrimaryHDU(image)
    hdu.header['OBJECT'] = 'Sgr A*'
    hdu.header['TELESCOP'] = 'EHT'
    hdu.header['FREQ'] = 230e9
    hdu.header['BUNIT'] = 'JY/BEAM'
    hdu.header['BMAJ'] = 20e-6  # 20 microarcsec
    hdu.header['BMIN'] = 20e-6
    hdu.header['PIXSCALE'] = 2.0  # microarcsec/pixel
    
    fits_path = output_path / 'test_sgra_eht_image.fits'
    hdu.writeto(fits_path, overwrite=True)
    print(f"   ✓ Created: {fits_path}")
    
    # 2. Test GRAVITY light curve
    print("   Creating sample GRAVITY light curve...")
    duration = 120  # minutes
    cadence = 2.0
    time = np.arange(0, duration, cadence)
    
    period = 65.0  # minutes
    amplitude = 5.0
    mean_flux = 8.0
    
    flux = mean_flux + amplitude * np.sin(2 * np.pi * time / period)
    flux += 0.8 * np.random.randn(len(time))
    flux_err = np.ones_like(flux) * 0.8
    
    # Save as CSV
    csv_path = output_path / 'test_gravity_flare.csv'
    np.savetxt(csv_path, 
               np.column_stack([time, flux, flux_err]),
               delimiter=',',
               header='time_minutes,flux_mJy,flux_err_mJy',
               comments='')
    print(f"   ✓ Created: {csv_path}")
    
    # 3. Test GRMHD parameters
    print("   Creating sample GRMHD parameter file...")
    params = {
        'spin': 0.94,
        'tilt': 25.0,
        'inclination': 50.0,
        'electron_model': 'thermal',
        'magnetic_model': 'SANE'
    }
    
    json_path = output_path / 'test_grmhd_params.json'
    with open(json_path, 'w') as f:
        json.dump(params, f, indent=2)
    print(f"   ✓ Created: {json_path}")
    
    print("\n   Test data creation complete!")
    print(f"   Location: {output_path.absolute()}")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description='Download and prepare data for Eolisa Space wormhole analysis'
    )
    parser.add_argument(
        '--output-dir',
        default='./data',
        help='Directory to save downloaded data (default: ./data)'
    )
    parser.add_argument(
        '--sample-only',
        action='store_true',
        help='Create sample data only (for testing without real data)'
    )
    parser.add_argument(
        '--dataset',
        choices=['eht_sgra_2017', 'synthetic_grmhd', 'all'],
        default='all',
        help='Specific dataset to download (default: all)'
    )
    
    args = parser.parse_args()
    
    if args.test_only:
        create_test_data(args.output_dir)
    else:
        downloader = DataDownloader(args.output_dir)
        
        if args.dataset == 'all':
            downloader.download_all()
        else:
            downloader.download_dataset(args.dataset)
        
        # Also create sample data for testing
        print("\n" + "="*60)
        create_test_data(args.output_dir)
    
    print("\n" + "="*60)
    print("Setup complete!")
    print("="*60)
    print("\nNext steps:")
    print("  1. Review downloaded data in:", args.output_dir)
    print("  2. Run analysis pipeline:")
    print("     python run_full_analysis.py")
    print("  3. See README.md for detailed instructions")
