import numpy as np
import scipy.signal as sps

from .logs import get_logger_from_arg
from .signal_manipulation import preemphasis


def extract_mel_spectrogram_for_tts(wav_signal, fs, n_fft, step_size, n_mels, mel_fmin, mel_fmax, min_amplitude,
                                    pre_emphasis=True, pre_emph_coeff=0.97, dynamic_range=None, real_amplitude=True,
                                    centered=True, normalize_mel_bins=True, normalize_range=True, logger=None):
    """ Extract mel-spectrogram from an audio signal for TTS training

    :param wav_signal:          Numpy array of audio samples -- shape = (T, )
    :param fs:                  sampling frequency of the audio signal
    :param n_fft:               filter length (in samples) of the FFT
    :param step_size:           length (in samples) between successive analysis windows
    :param n_mels:              number of mel components in the mel-spectrogram
    :param mel_fmin:            minimum frequency used when converting to mel
    :param mel_fmax:            maximum frequency used when converting to mel
    :param min_amplitude:       mel-spectrogram minimal permitted amplitude value (limits the dynamic range)
    :param pre_emphasis:        perform pre-emphasis on input audio
    :param pre_emph_coeff:      pre-emphasis coefficient
    :param dynamic_range:       mel-spectrogram maximal dynamic range in dB (ignored if min_amplitude is specified)
    :param real_amplitude:      if True, the value of the spectrogram bins will be divided by n_fft to get bin magnitude that
                                reflect the temporal signal amplitude
    :param centered:            if True, the spectrogram extraction window will be centered on the time step.
                                The time sequence has to be padded.
    :param normalize_mel_bins:  normalize energy per bins in the mel-spectrogram
    :param normalize_range:     If True, map the db_dynamic_range to [0,1]
    :param logger:              arg to create logger object

    :return: the mel-spectrogram corresponding to the input audio
    """
    # perform pre-emphasis on input audio
    if pre_emphasis:
        wav_signal = preemphasis(wav_signal, preemph=pre_emph_coeff)

    # get linear amplitude spectrogram
    s, _ = extract_spectrogram(x=wav_signal, n_fft=n_fft, step_size=step_size,
                               real_amplitude=real_amplitude, centered=centered)

    # convert to mel frequency scale
    s = linear_to_mel(linear_spectrogram=s, fs=fs, n_mels=n_mels, mel_fmin=mel_fmin, mel_fmax=mel_fmax,
                      normalize_mel_bins=normalize_mel_bins, logger=logger)

    # extract min amplitude to clip the mel-spectrogram and set the dynamic range
    if min_amplitude or dynamic_range:
        min_amplitude = get_spectrogram_min_amplitude(real_amplitude=real_amplitude, min_amplitude=min_amplitude,
                                                      dynamic_range=dynamic_range, n_fft=n_fft, logger=logger)

    # convert to dB and normalize range to [0, 1]
    s = amplitude_to_db(spectrogram=s, min_amplitude=min_amplitude, normalize_range=normalize_range, logger=logger)

    return s, wav_signal


def get_spectrogram_min_amplitude(real_amplitude, min_amplitude=None, dynamic_range=None, n_fft=None, logger=None):
    """ Compute the minimum amplitude value a spectrogram bin can reach

    :param real_amplitude:  If True, assume that the values of the spectrogram bins were divided by n_fft to get
                            bin magnitude that reflect the temporal signal amplitude
    :param min_amplitude:   The spectrogram minimal permitted amplitude value (limits the dynamic range)
                            This value is affected when real_amplitude is set to True
    :param dynamic_range:   The spectrogram maximal dynamic range in dB (ignored if min_amplitude is specified)
                            This value is affected when real_amplitude is set to True
    :param n_fft:           Number of samples of the FFT window used to extract spectrogram
                            Only used when real_amplitude is set to True
    :param logger:          arg to create logger object

    :return: the minimum amplitude of spectrogram bins
    """
    # create logger object
    logger = get_logger_from_arg(logger)

    if min_amplitude and dynamic_range:
        logger.warning(f'Both "min_amplitude" and "dynamic_range" are specified, '
                       f'only "min_amplitude" ({min_amplitude}) will be considered')
    else:
        assert (min_amplitude or dynamic_range), logger.error(f'Neither "min_amplitude" nor "dynamic_range" are set')

    if real_amplitude:
        assert (n_fft is not None), logger.error(f'"real_amplitude" is set to True but "n_fft" has no value')
    else:
        n_fft = 1  # equivalent to using a FFT window of 1

    if min_amplitude:
        # compute real min amplitude per bin
        min_amplitude = min_amplitude / n_fft

    elif dynamic_range:
        # compute real dynamic range per bin
        dynamic_range = dynamic_range + 20 * np.log10(n_fft)
        # compute real min amplitude per bin
        min_amplitude = 10 ** (-dynamic_range / 20)

    return min_amplitude


def amplitude_to_db(spectrogram, min_amplitude=None, normalize_range=False, logger=None):
    """ Transform amplitude to dB with optional clipping and dynamic range normalization

    :param spectrogram:         Numpy array containing all amplitudes of a spectrogram
    :param min_amplitude:       Clip the spectrogram to the minimal permitted amplitude value
    :param normalize_range:     If True, map the db_dynamic_range to [0,1]
    :param logger:              arg to create logger object

    :return: spectrogram in dB
    """
    # create logger object
    logger = get_logger_from_arg(logger)

    # make sure amplitude bins are positive
    spectrogram = np.abs(spectrogram)

    if min_amplitude:
        # apply clipping
        spectrogram = np.clip(spectrogram, a_min=min_amplitude, a_max=None)

    # transform to dB
    spectrogram = 20 * np.log10(spectrogram)

    # normalize range if necessary
    if normalize_range:
        # min_amplitude must be given to normalize de dB dynamic range
        assert (min_amplitude), logger.error(f'Asked for dynamic range normalization, but "min_amplitude" has no value')

        # compute dB dynamic range and map it to [0, 1]
        dynamic_range = -20 * np.log10(min_amplitude)
        spectrogram = (spectrogram + dynamic_range) / dynamic_range

    return spectrogram


def denormalize_range(spectrogram, min_amplitude_used):
    """ Take a dB spectrogram that has been mapped between [0, 1] and shape it back to its original dB dynamic range

    :param spectrogram:         Numpy array containing all amplitudes of a spectrogram in dB (values between 0 and 1)
    :param min_amplitude_used:  Minimal amplitude value that was used to normalize the dB spectrogram dynamic range

    :return: spectrogram in dB with its range de-normalized
    """
    # compute dB dynamic range
    dynamic_range = -20 * np.log10(min_amplitude_used)

    # denormalize dB dynamic range
    spectrogram = spectrogram * dynamic_range - dynamic_range

    return spectrogram


def db_to_amplitude(spectrogram):
    """ Transform dB spectrogram to amplitude spectrogram

     :param spectrogram:    Numpy array containing all amplitude of a spectrogram

     :return: spectrogram in amplitude value
     """
    return 10 ** (spectrogram / 20)


def linear_to_mel(linear_spectrogram, fs=None, n_mels=80, mel_fmin=0, mel_fmax=None,
                  normalize_mel_bins=True, logger=None):
    """ Convert a linear spectrogram to a mel-spectrogram

    :param linear_spectrogram:    Numpy array containing all amplitudes of a spectrogram -- shape = (n_fft // 2 + 1, T)
    :param fs:                    Sampling frequency expected by the algorithm
    :param n_mels:                Number of bins in the mel-spectrogram
    :param mel_fmin:              Lowest frequency in the mel-spectrum (Hz)
    :param mel_fmax:              Highest frequency in the mel-spectrum (Hz)
    :param normalize_mel_bins:    normalize energy per bins in the mel-spectrogram
    :param logger:                arg to create logger object

    :return: Numpy array containing the spectrogram in mel frequency space -- shape = (n_mels, T)
    """
    # find the number of samples used in the FFT window
    n_fft = (linear_spectrogram.shape[0] - 1) * 2

    # get filter parameters
    mel_basis = _get_mel_filterbank_matrix(n_fft=n_fft, fs=fs, n_mels=n_mels, mel_fmin=mel_fmin, mel_fmax=mel_fmax,
                                           normalize_mel_bins=normalize_mel_bins, logger=logger)

    # apply filter bank matrix
    return np.dot(mel_basis, linear_spectrogram)


def mel_to_linear(mel_spectrogram, fs, n_fft, mel_fmin=0, mel_fmax=None, normalize_mel_bins=False, logger=None):
    """ Convert a mel-spectrogram to a linear spectrogram

    :param mel_spectrogram:         Numpy array of the input mel spectrogram -- shape = (n_mels, T)
    :param fs:                      sampling frequency
    :param n_fft:                   number of samples used in the original FFT
    :param mel_fmin:                minimum frequency used when converting to mel
    :param mel_fmax:                maximum frequency used when converting to mel
    :param normalize_mel_bins:      normalize energy per bins in the mel-spectrogram
    :param logger:                  arg to create logger object

    :return: Numpy array containing the spectrogram in linear frequency space -- shape = (n_fft // 2 + 1, T)
    """
    # find the number of mel components
    n_mels = mel_spectrogram.shape[0]

    # get filter parameters
    mel_basis = _get_mel_filterbank_matrix(n_fft=n_fft, fs=fs, n_mels=n_mels, mel_fmin=mel_fmin, mel_fmax=mel_fmax,
                                           normalize_mel_bins=normalize_mel_bins, logger=logger)

    # normalise the row of the mel_basis
    weight_value = mel_basis.sum(axis=1)
    mel_basis = np.divide(mel_basis, weight_value.reshape(n_mels, 1))

    # apply the inverse of the mel_filter bank to the algorithm
    linear_spectrogram = np.dot(np.transpose(mel_spectrogram), mel_basis)

    return np.transpose(linear_spectrogram)


def extract_spectrogram(x, n_fft, step_size, real_amplitude=True, centered=True):
    """ Extract the FFT spectrogram from a series of samples

    :param x:                   Numpy array of input samples -- shape = (T, )
    :param n_fft:               number of point in the FFT window
    :param step_size:           number of samples skipped at each extraction
    :param real_amplitude:      if True the value of the bins will be divided by n_fft to get bin magnitude that
                                reflect the temporal signal amplitude
    :param centered:            if True, the extraction window will be centered on the time step.
                                The time sequence has to be padded.

    :return: Numpy arrays of amplitude and phase of the spectrogram -- shapes = (n_fft // 2 + 1, L)
    """
    # create the sampling window
    window = sps.hann(n_fft)

    # check input signal has a length superior or equal to n_fft
    if len(x) < n_fft:
        x = np.pad(x, (0, len(window) - len(x)), 'constant', constant_values=(0, 0))

    # pad before and after to center the window on the extracted values
    if centered:
        padding_left, padding_right = _get_padding_for_centered_spectrogram(n_fft=n_fft)
        x = np.pad(x, (padding_left, padding_right), mode='reflect')

    # count the number of frames 
    if len(x) % step_size == 0:
        time_axis = int(np.floor((len(x) - n_fft) / step_size))
    else:
        time_axis = 1 + int(np.floor((len(x) - n_fft) / step_size))

    # create container for spectrogram
    amp = np.zeros((n_fft // 2 + 1, time_axis))
    phase = np.zeros((n_fft // 2 + 1, time_axis))

    for i in range(time_axis):
        # get slice of data
        win_data = x[i * step_size: i * step_size + n_fft]

        # apply windowing
        win_data = np.multiply(win_data, window)

        # get FFT
        freq = np.fft.rfft(win_data)

        # save magnitude and phase individually
        amp[:, i] = np.absolute(freq)
        phase[:, i] = np.angle(freq)

    # scale amplitude bins if necessary
    if real_amplitude:
        amp = amp / n_fft

    return amp, phase


def get_nb_spectrogram_samples(wav_length, n_fft, step_size, centered=True):
    """ Return the number of spectrogram time frames given a WAV segment

    :param wav_length:      number of samples in the WAV segment
    :param n_fft:           filter length (in samples) of the FFT
    :param step_size:       length (in samples) between successive analysis windows
    :param centered:        if True, assume that the FFT extraction window is centered on the time step

    :return: the number of spectrogram time frames
    """
    # create random signal
    random_signal = np.random.rand(wav_length)

    # extract amp and phase spectrograms -- shapes = (n_fft // 2 + 1, T)
    amp, phase = extract_spectrogram(x=random_signal, n_fft=n_fft, step_size=step_size, centered=centered)

    # return T
    return amp.shape[1]


def get_nb_wav_samples(spectrogram_length, n_fft, step_size, centered=True):
    ''' Return the number of WAV samples given a spectrogram segment

    :param spectrogram_length:      number of time frames in the spectrogram segment
    :param n_fft:                   filter length (in samples) of the FFT
    :param step_size:               length (in samples) between successive analysis windows
    :param centered:                if True, assume that the FFT extraction window is centered on the time step

    :return: the number of WAV samples
    '''
    # audio segment was padded on the left and right to center the window on the extracted values
    if centered:
        padding_left, padding_right = _get_padding_for_centered_spectrogram(n_fft=n_fft)
    else:
        padding_left, padding_right = 0, 0

    return (spectrogram_length - 1) * step_size + n_fft - padding_left - padding_right


def reconstruct_signal_griffin_lim(magnitude_spectrogram, step_size, iterations=30, logger=None):
    """ Reconstruct an audio signal from a magnitude spectrogram

        Given a magnitude spectrogram as input, reconstruct the audio signal and return it using
        the Griffin-Lim algorithm
        From the paper: "Signal estimation from modified short-time fourier transform" by Griffin and Lim, in IEEE
                        transactions on Acoustics, Speech, and Signal Processing. Vol ASSP-32, No. 2, April 1984.

    :param magnitude_spectrogram:   Numpy array magnitude spectrogram -- shape = (n_fft // 2 + 1, T)
                                    The rows correspond to frequency bins and the columns correspond to time slices
    :param step_size:               length (in samples) between successive analysis windows
    :param iterations:              Number of iterations for the Griffin-Lim algorithm
                                    Typically a few hundred is sufficient
    :param logger:                  arg to create logger object

    :return: the reconstructed time domain signal as a 1-dim Numpy array and the spectrogram that was used
             to produce the signal
    """
    # create logger object
    logger = get_logger_from_arg(logger)

    # shape = (T, n_fft // 2 + 1)
    magnitude_spectrogram = np.transpose(magnitude_spectrogram)

    # find the number of samples used in the FFT window and extract the time steps
    n_fft = (magnitude_spectrogram.shape[1] - 1) * 2
    time_slices = magnitude_spectrogram.shape[0]

    # compute the number of samples needed
    len_samples = int(time_slices * step_size + n_fft)

    # initialize the reconstructed signal to noise
    x_reconstruct = np.random.randn(len_samples)
    window = np.hanning(n_fft)
    n = iterations  # number of iterations of Griffin-Lim algorithm

    while n > 0:
        # decrement and compute FFT
        n -= 1
        reconstruction_spectrogram = np.array([np.fft.rfft(window * x_reconstruct[i: i + n_fft])
                                               for i in range(0, len(x_reconstruct) - n_fft, step_size)])

        # Discard magnitude part of the reconstruction and use the supplied magnitude spectrogram instead
        proposal_spectrogram = magnitude_spectrogram * np.exp(1.0j * np.angle(reconstruction_spectrogram))

        # store previous reconstructed signal and create a new one by iFFT
        prev_x = x_reconstruct
        x_reconstruct = np.zeros(len_samples)

        for i, j in enumerate(range(0, len(x_reconstruct) - n_fft, step_size)):
            x_reconstruct[j: j + n_fft] += window * np.real(np.fft.irfft(proposal_spectrogram[i]))

        # normalise signal due to overlap add
        x_reconstruct = x_reconstruct / (n_fft / step_size / 2)

        # compute diff between two signals and report progress
        diff = np.sqrt(sum((x_reconstruct - prev_x) ** 2) / x_reconstruct.size)
        logger.debug(f'Reconstruction iteration: {iterations - n}/{iterations} -- RMSE: {diff * 1e6:.3f}e-6')

    return x_reconstruct, proposal_spectrogram


def _get_padding_for_centered_spectrogram(n_fft):
    """ Return padding that must be added to the left and right sides of a series of samples to extract a centered FFT

    :param n_fft:       filter length (in samples) of the FFT

    :return: padding values for left and right sides
    """
    # add same padding on left and right sides
    padding_left, padding_right = int(n_fft // 2), int(n_fft // 2)

    return padding_left, padding_right


def _get_mel_filterbank_matrix(n_fft=None, fs=None, n_mels=80, mel_fmin=0, mel_fmax=None,
                               normalize_mel_bins=False, logger=None):
    """ Create a Filterbank matrix to combine FFT bins into Mel-frequency bins

    :param n_fft:               number of FFT components
    :param fs:                  sampling rate of the incoming signal
    :param n_mels:              number of Mel bands to generate
    :param mel_fmin:            lowest frequency (in Hz)
    :param mel_fmax:            highest frequency (in Hz). If None, mel_fmax = sr / 2.0
    :param normalize_mel_bins:  normalize energy per bins
    :param logger:              arg to create logger object

    :return: np.ndarray [shape=(n_mels, 1 + n_fft // 2)] -- Mel transform matrix
    """
    # create logger object
    logger = get_logger_from_arg(logger)

    # set mel_fmax
    if mel_fmax is None:
        mel_fmax = float(fs) / 2

    # Initialize the weights
    weights = np.zeros((int(n_mels), int(1 + n_fft // 2)))

    # Get the center frequencies of each FFT bin
    fft_freqs = np.linspace(0, float(fs) / 2, int(1 + n_fft // 2), endpoint=True)

    # 'Center freqs' of mel bands - uniformly spaced between limits
    min_mel = _hz_to_mel(mel_fmin)
    max_mel = _hz_to_mel(mel_fmax)

    mels = np.linspace(min_mel, max_mel, n_mels + 2)
    mel_f = _mel_to_hz(mels)

    fdiff = np.diff(mel_f)
    ramps = np.subtract.outer(mel_f, fft_freqs)

    for i in range(n_mels):
        # lower and upper slopes for all bins
        lower = -ramps[i] / fdiff[i]
        upper = ramps[i + 2] / fdiff[i + 1]

        # then intersect them with each other and zero
        weights[i] = np.maximum(0, np.minimum(lower, upper))

    if normalize_mel_bins:  # Normalize energy per bins
        # Slaney-style mel is scaled to be approx constant energy per channel
        enorm = 2.0 / (mel_f[2:n_mels + 2] - mel_f[:n_mels])
        weights *= enorm[:, np.newaxis]

    # Only check weights if f_mel[0] is positive
    if not np.all((mel_f[:-2] == 0) | (weights.max(axis=1) > 0)):  # This means we have an empty channel somewhere
        # create logger object (only if needed)
        logger = get_logger_from_arg(logger)
        logger.warning('Empty filters detected in mel frequency basis. Some channels will produce empty responses. '
                       'Try increasing your sampling rate (and fmax) or reducing n_mels.')

    return weights


def _hz_to_mel(frequencies):
    """ Convert Hz to Mels

    :param frequencies:     number or np.ndarray [shape=(n,)] -- scalar or array of frequencies

    :return: number or np.ndarray [shape=(n,)] -- input frequencies in Mels
    """
    # create frequencies array
    frequencies = np.asanyarray(frequencies)

    # Fill in the linear part
    f_min = 0.0
    f_sp = 200.0 / 3
    mels = (frequencies - f_min) / f_sp

    # Fill in the log-scale part
    min_log_hz = 1000.0  # beginning of log region (Hz)
    min_log_mel = (min_log_hz - f_min) / f_sp  # same (Mels)
    log_step = np.log(6.4) / 27.0  # step size for log region

    if frequencies.ndim:  # If we have array data, vectorize
        log_t = (frequencies >= min_log_hz)
        mels[log_t] = min_log_mel + np.log(frequencies[log_t] / min_log_hz) / log_step

    elif frequencies >= min_log_hz:  # If we have scalar data, heck directly
        mels = min_log_mel + np.log(frequencies / min_log_hz) / log_step

    return mels


def _mel_to_hz(mels):
    """ Convert mel bin numbers to frequencies

    :param mels:    number or np.ndarray [shape=(n,)] -- scalar or array of mel bins to convert

    :return: number or np.ndarray [shape=(n,)] -- input mels in Hz
    """
    # create mels array
    mels = np.asanyarray(mels)

    # Fill in the linear scale
    f_min = 0.0
    f_sp = 200.0 / 3
    freqs = f_min + f_sp * mels

    # And now the nonlinear scale
    min_log_hz = 1000.0  # beginning of log region (Hz)
    min_log_mel = (min_log_hz - f_min) / f_sp  # same (Mels)
    log_step = np.log(6.4) / 27.0  # step size for log region

    if mels.ndim:  # If we have vector data, vectorize
        log_t = (mels >= min_log_mel)
        freqs[log_t] = min_log_hz * np.exp(log_step * (mels[log_t] - min_log_mel))

    elif mels >= min_log_mel:  # If we have scalar data, check directly
        freqs = min_log_hz * np.exp(log_step * (mels - min_log_mel))

    return freqs


def pre_emphasis_on_mel(mel_spec, preemph, fs, n_mels, mel_fmin=0, mel_fmax=None, min_amplitude=None,
                        normalized_range=True, logger=''):
    logger = get_logger_from_arg(logger)

    # set mel_fmax
    if mel_fmax is None:
        mel_fmax = float(fs) / 2

    #### get the center frequency of all bins in the mel spectrum ####
    # 'Center freqs' of mel bands - uniformly spaced between limits
    min_mel = _hz_to_mel(mel_fmin)
    max_mel = _hz_to_mel(mel_fmax)

    mels = np.linspace(min_mel, max_mel, n_mels + 2)
    bin_freqs = _mel_to_hz(mels)

    #### get the the frequency response of the filter
    a = [1]
    b = [1, -preemph]
    w, h = sps.freqz(b=b, a=a, worN=bin_freqs[1:-1], fs=fs)

    #### apply filter to bins ###
    h = 20 * np.log10(np.abs(h))  # get the filter response in dB
    h = np.tile(np.expand_dims(h, axis=1), (1, mel_spec.shape[1]))

    # if range was normalized
    if normalized_range:
        dbr = -20 * np.log10(min_amplitude)
        # normalize filter
        h = h / dbr

    # Crazy empirical correction hack with magic numbers
    if min_amplitude == 1e-5 and preemph == 0.97:
        correction_matrix = np.log(w) / 30 - 0.277
        correction_matrix = np.tile(np.expand_dims(correction_matrix, axis=1), (1, h.shape[1]))
        h = h - correction_matrix
    else:
        logger.warn("You should probably compute a correction matrix for this config to compensate for the cliping.")

    return np.add(mel_spec, h)