import os
import librosa
import numpy as np
from tabulate import tabulate
import soundfile as sf
import scipy.ndimage
import itertools
from tqdm import tqdm
import torch
import torchaudio

class AudioProcessor:

    def __init__(self, audio_file):
         self.path = audio_file
         self.name = os.path.splitext(os.path.basename(audio_file))[0]
         self.format = os.path.splitext(os.path.basename(audio_file))[1]
         self.duration = librosa.get_duration(path=audio_file)
         self.sample_rate = librosa.get_samplerate(audio_file)
         self.changes = []
         self.optimized_params = None
         self.load_details()

    # File information methods
    def load_details(self):
        """Save the attributes of the audio file."""
        data = [
            ["File Name", self.name],
            ["File Format", self.format],
            ["Duration", f"{self.duration} seconds"],
            ["Sample Rate", f"{self.sample_rate} Hz"]
        ]
        table = tabulate(data, headers=["Attribute", "Value"], tablefmt="outline")
        self.changes.append(table)
        return table
    
    def display_details(self):
        """Display the details of the audio file."""
        print(self.changes[-1])

    def display_changes(self):
        """Display the changes made to the audio file side by side."""
        self._clean_duplicates_changes()
        if len(self.changes) == 1:
            self.display_details()
        else:
            table1 = self.changes[0].split('\n')
            table2 = self.changes[-1].split('\n')

            combined_table = []
            for line1, line2 in zip(table1, table2):
                combined_table.append([line1, '===>', line2])

            print(tabulate(combined_table, tablefmt="plain"))

    def _clean_duplicates_changes(self):
        """Remove duplicate consecutive changes from the audio file."""
        self.changes = [change for i, change in enumerate(self.changes) 
                        if i == 0 or change != self.changes[i-1]]

    # Audio processing methods
    def load_as_array(self, sample_rate: int = 16000) -> np.ndarray:
        """
        Load an audio file and convert it into a NumPy array.

        Parameters
        ----------
        sample_rate : int, optional
            The sample rate to which the audio will be resampled (default is 16000 Hz).

        Returns
        -------
        np.ndarray
            A NumPy array containing the audio data.
        """
        try:
            audio, sr = librosa.load(self.path, sr=sample_rate)
            self.sample_rate = sr
            return audio
        except Exception as e:
            raise RuntimeError(f"Failed to load audio file: {e}")
        
    def resample_wav(self) -> str:
        output_path = os.path.join('resampled_files', f'{self.name}.wav')
        try:
            audio, sr = librosa.load(self.path)
            resampled_audio = librosa.resample(y=audio, orig_sr=sr, target_sr=16000)
            os.makedirs(os.path.dirname(output_path), exist_ok=True)
            sf.write(output_path, resampled_audio, 16000)
            self._update_file_info(output_path)
            return output_path
        except Exception as e:
            raise RuntimeError(f"Failed to resample audio file: {e}")
        
    def convert_to_wav(self):
        """
        Converts an audio file to WAV format.

        Returns
        -------
        str
            The path to the converted audio file.
        """        
        output_path = os.path.join('converted_files', f'{self.name}.wav')
        try:
            os.makedirs(os.path.dirname(output_path), exist_ok=True)
            audio, sr = librosa.load(self.path, sr=16000)
            sf.write(output_path, audio, 16000)
            self._update_file_info(output_path)
            return output_path
        except Exception as e:
            raise RuntimeError(f"Failed to convert audio file to WAV: {e}")

    def enhance_audio(self, noise_reduce_strength=0.5, voice_enhance_strength=1.5, volume_boost=1.2):
        """
        Enhance audio quality by reducing noise and clarifying voices.
        """
        try:
            y, sr = librosa.load(self.path, sr=16000)
            y_enhanced = self._enhance_audio_sample(y, noise_reduce_strength, voice_enhance_strength, volume_boost)

            output_path = os.path.join('enhanced_files', f'{self.name}_enhanced.wav')
            os.makedirs(os.path.dirname(output_path), exist_ok=True)
            sf.write(output_path, y_enhanced, sr)

            self._update_file_info(output_path)
            return output_path
        except Exception as e:
            raise RuntimeError(f"Failed to enhance audio: {e}")

    def _compute_spectral_contrast(self, y, sr, n_bands=6, fmin=200.0, quantile=0.02, hop_length=512):
        """
        Compute spectral contrast using librosa.
        Higher contrast generally indicates clearer speech separation from background.
        """
        S = np.abs(librosa.stft(y, hop_length=hop_length))
        contrast = librosa.feature.spectral_contrast(
            S=S, 
            sr=sr,
            n_bands=n_bands,
            fmin=fmin,
            quantile=quantile,
            hop_length=hop_length
        )
        return np.mean(contrast)

    def optimize_enhancement_parameters(self, step=0.25, max_iterations=50, sample_duration=30):
        """
        Find optimal parameters for audio enhancement using grid search on a sample.
        """
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        y_orig, sr = librosa.load(self.path, duration=sample_duration)
        y_orig_tensor = torch.tensor(y_orig, device=device)

        param_ranges = [
            np.arange(0.25, 1.5, step),  # noise_reduce_strength
            np.arange(1.0, 3.0, step),   # voice_enhance_strength
            np.arange(1.0, 2.0, step)    # volume_boost
        ]

        best_score = float('-inf')
        best_params = None

        total_iterations = min(max_iterations, len(list(itertools.product(*param_ranges))))

        for params in tqdm(itertools.islice(itertools.product(*param_ranges), max_iterations), 
                          total=total_iterations, 
                          desc="Searching for optimal parameters"):
            y_enhanced = self._enhance_audio_sample(y_orig, *params)
            y_enhanced_tensor = torch.tensor(y_enhanced, device=device)

            # Correlation between original and enhanced audio
            min_length = min(len(y_orig_tensor), len(y_enhanced_tensor))
            y_orig_trimmed = y_orig_tensor[:min_length]
            y_enhanced_trimmed = y_enhanced_tensor[:min_length]
            correlation = torch.corrcoef(torch.stack([y_orig_trimmed, y_enhanced_trimmed]))[0, 1].item()

            # Spectral contrast improvement
            contrast_orig = self._compute_spectral_contrast(y_orig, sr)
            contrast_enhanced = self._compute_spectral_contrast(y_enhanced, sr)
            contrast_improvement = contrast_enhanced - contrast_orig

            score = (0.3 * correlation) + (0.7 * contrast_improvement)

            if score > best_score:
                best_score = score
                best_params = params

        self.optimized_params = best_params
        return best_params

    def _enhance_audio_sample(self, y, noise_reduce_strength=0.5, voice_enhance_strength=1.5, volume_boost=1.2):
        """
        Enhance an audio sample by reducing noise and enhancing voice clarity.
        
        Parameters
        ----------
        y : np.ndarray
            Input audio signal
        noise_reduce_strength : float
            Strength of noise reduction (default: 0.5)
        voice_enhance_strength : float
            Strength of voice enhancement (default: 1.5)
        volume_boost : float
            Volume boost factor (default: 1.2)
        
        Returns
        -------
        np.ndarray
            Enhanced audio signal
        """
        # STFT
        S = librosa.stft(y, n_fft=2048)
        S_mag, S_phase = np.abs(S), np.angle(S)
        S_filtered = scipy.ndimage.median_filter(S_mag, size=(1, 31))
        
        # Noise reduction mask
        mask = np.clip((S_mag - S_filtered) / (S_mag + 1e-10), 0, 1) ** noise_reduce_strength
        S_denoised = S_mag * mask * np.exp(1j * S_phase)

        # Inverse STFT
        y_denoised = librosa.istft(S_denoised)

        # Harmonic-percussive separation and enhancement
        y_harmonic, y_percussive = librosa.effects.hpss(y_denoised)
        y_enhanced = (y_harmonic * voice_enhance_strength + y_percussive) * volume_boost

        return librosa.util.normalize(y_enhanced, norm=np.inf, threshold=1.0)

    # Helper method
    def _update_file_info(self, new_path):
        """Update file information after processing."""
        self.path = new_path
        self.sample_rate = librosa.get_samplerate(new_path)
        self.format = os.path.splitext(new_path)[1]
        self.duration = librosa.get_duration(path=new_path)
        self.load_details()