import os
import struct
import pickle

import numpy as np
import torch
import lightgbm as lgb
import torchaudio
from sklearn.exceptions import NotFittedError
from torchaudio.transforms import Spectrogram
import torch.nn.functional as F
from datasets.formatting import query_table
import warnings

warnings.filterwarnings("ignore")

SR = 12000


class FastModel:
    """
    A class designed for training and predicting using LightGBM, incorporating spectral and cepstral features.

    ### Workflow:
    1. Batch Loading and Decoding:
    Load audio data in batches directly from a table and decode byte-encoded information.

    2. Processing Audio:
        - Resampling, Padding, or Truncating:
    Adjust audio durations by padding, cutting, or resampling as needed.
        - Spectral and Cepstral Feature Extraction:
            - Compute the spectrogram for audio signals.
            - Focus on a selected frequency range (~50-1500 Hz) to derive the cepstrum, calculated as the FFT of the logarithm of the spectrogram.
            - Average both spectrogram and cepstral features over the time axis and combine them into a unified feature vector.

    3. Model Application:
    Use the extracted features as input for the LightGBM model to perform predictions.

    ### Options for Energy Optimization:
    - Feature Selection:
    Mask less significant features to reduce computation.
    - Signal Truncation:
    Process only a limited duration (e.g., a few seconds) of the audio signal.
    - Hardware Acceleration:
    Utilize CUDA to speed up feature computation when supported.

    Attributes
    ----------
    feature_params : dict
        Parameters for configuring the MelSpectrogram transformation during training.
    lgbm_params : dict, optional
        Parameters for configuring the LightGBM model.
    model_file : str
        Path for saving or loading the trained LightGBM model.
    padding_method : str
        Padding method to apply when the waveform size is smaller than the desired size.
    waveform_duration : float
        Duration of the audio waveform to process, in seconds.
    mask_features : bool
        Whether to enable feature masking for dimensionality reduction.
    mask_file : str
        Path to save or load the feature mask file.
    mask_ratio : float
        The ratio of features to retain when feature masking is applied.
    batch_size : int
        Number of samples per batch during training and prediction.
    apply_offset_on_fit : bool
        Whether to apply the offset on fit. Useful if waveform_duration is below than 3 seconds.
    device : str
        Device used for computation ("cpu" or "cuda").

    Methods
    -------
    _save_feature_mask(model, n_features, ratio):
        Saves the most important features as a mask.
    _load_feature_mask():
        Loads the feature mask from the saved file.
    fit(dataset):
        Trains the LightGBM model on audio features extracted from the dataset.
    predict(dataset, get_proba=False):
        Predicts labels or probabilities for a dataset using the trained model.
    get_features(audios, spectrogram_transformer, cepstral_transformer):
        Extracts features from raw audio using spectrogram and cepstral transformations.
    """

    def __init__(
        self,
        feature_params,
        lgbm_params=None,
        padding_method="reflect",
        waveform_duration=3,
        model_file=None,
        mask_features=False,
        mask_file="feature_mask.pkl",
        mask_ratio=0.25,
        batch_size=5000,
        apply_offset_on_fit=False,
        device="cpu",
    ):
        self.feature_params = feature_params
        self.lgbm_params = lgbm_params
        self.model_file = model_file
        self.padding_method = padding_method
        self.waveform_duration = waveform_duration
        self.mask_features = mask_features
        self.mask_file = mask_file
        self.mask_ratio = mask_ratio
        self.batch_size = batch_size
        self.apply_offset_on_fit = apply_offset_on_fit
        self.device = torch.device(
            "cuda" if device == "cuda" and torch.cuda.is_available() else "cpu"
        )
        self.spectrogram_transformer = Spectrogram(
            n_fft=self.feature_params["n_fft"],
            hop_length=self.feature_params["hop_length"],
            pad=self.feature_params["pad"],
            window_fn=self.feature_params["win_spectrogram"],
            power=self.feature_params["power"],
            pad_mode=self.feature_params["pad_mode"],
            onesided=True,
            center=False,
        ).to(self.device)
        self.f = torch.fft.rfftfreq(self.feature_params["n_fft"], d=1.0 / SR)
        self.ind_f_filtered = torch.tensor(
            (self.f > self.feature_params["f_min"]) & (self.f < self.feature_params["f_max"]),
            device=self.device,
        )
        self.n_fft_cepstral = self.ind_f_filtered.sum()
        self.cepstral_transformer = Spectrogram(
            n_fft=self.n_fft_cepstral,
            hop_length=self.n_fft_cepstral,
            pad=0,
            window_fn=self.feature_params["win_cepstral"],
            power=self.feature_params["power"],
            pad_mode=self.feature_params["pad_mode"],
            onesided=True,
            center=False,
        ).to(self.device)
        self.cf = torch.fft.rfftfreq(self.n_fft_cepstral, d=0.5)
        self.ind_cf_filtered = torch.tensor(
            (self.cf > self.feature_params["fc_min"]) & (self.cf < self.feature_params["fc_max"]),
            device=self.device,
        )

    def _save_feature_mask(self, model, n_features, ratio):
        feature_importance = model.feature_importance(importance_type="gain")
        sorted_indices = np.argsort(feature_importance)[::-1]
        top_indices = sorted_indices[: max(1, int(n_features * ratio))]
        mask = np.zeros(n_features, dtype=bool)
        mask[top_indices] = True
        with open(self.mask_file, "wb") as f:
            pickle.dump(mask, f)

    def _load_feature_mask(self):
        with open(self.mask_file, "rb") as f:
            return pickle.load(f)

    def fit(self, dataset):
        """
        Trains a LightGBM model on features extracted from the dataset.

        Parameters
        ----------
        dataset : Dataset
            Dataset object containing audio samples and their corresponding labels.

        Raises
        ------
        ValueError
            If the dataset is empty or invalid.
        """
        features, labels = [], []
        offsets = [0, 12000, 24000] if self.apply_offset_on_fit else [0]
        for offset in offsets:
            for audio, label in batch_audio_loader(
                dataset,
                waveform_duration=self.waveform_duration,
                batch_size=self.batch_size,
                padding_method=self.padding_method,
                offset=offset,
            ):
                feature = self.get_features(
                    audio, self.spectrogram_transformer, self.cepstral_transformer
                )
                features.append(feature)
                labels.extend(label)
        x_train = torch.cat(features, dim=0)
        train_data = lgb.Dataset(x_train.cpu(), label=labels)
        model = lgb.train(self.lgbm_params, train_data)

        if self.mask_features:
            self._save_feature_mask(model, x_train.shape[1], self.mask_ratio)
            mask = self._load_feature_mask()
            x_train = x_train[:, mask]
            train_data = lgb.Dataset(x_train.cpu(), label=labels)
            model = lgb.train(self.lgbm_params, train_data)

        model.save_model(self.model_file)

    def predict(self, dataset, get_proba=False):
        """
        Predicts labels or probabilities for a dataset using the trained model.

        Parameters
        ----------
        dataset : Dataset
            The dataset containing audio data for prediction.
        get_proba : bool, optional
            If True, returns class probabilities rather than binary predictions (default is False).

        Returns
        -------
        numpy.ndarray
            If `get_proba` is True, returns a 1D array of class probabilities.
            If `get_proba` is False, returns a 1D array of binary predictions (0 or 1).

        Raises
        ------
        NotFittedError
            If the model is not yet trained.
        FileNotFoundError
            If the model file does not exist.
        """
        if not self.model_file:
            raise NotFittedError("The model is not trained yet. Train using the `fit` method.")
        if not os.path.isfile(self.model_file):
            raise FileNotFoundError(f"Model file {self.model_file} not found.")

        features = []
        for audio, _ in batch_audio_loader(
            dataset,
            waveform_duration=self.waveform_duration,
            batch_size=self.batch_size,
            padding_method=self.padding_method,
        ):
            feature = self.get_features(
                audio, self.spectrogram_transformer, self.cepstral_transformer
            )
            features.append(feature)
        features = torch.cat(features, dim=0)
        torch.cuda.empty_cache()

        if self.mask_features:
            mask = self._load_feature_mask()
            features = features[:, mask]

        model = lgb.Booster(model_file=self.model_file)
        y_score = model.predict(features.cpu())

        return y_score if get_proba else (y_score >= 0.5).astype(int)

    def get_features(self, audios, spectrogram_transformer, cepstral_transformer):
        """
        Extracts features from raw audio using spectrogram and cepstrum transformations.

        Parameters
        ----------
        audios : torch.Tensor
            A batch of audio waveforms as 1D tensors.
        spectrogram_transformer : Spectrogram
            Transformation used to compute MelSpectrogram features.
        cepstral_transformer : Spectrogram
            Transformation used to compute cepstral features.

        Returns
        -------
        torch.Tensor
            Extracted features for the audio batch. Includes both cepstral and log-scaled spectrogram features.

        Raises
        ------
        ValueError
            If the input audio tensor is empty or invalid.
        """
        audios = audios.to(self.device)
        sxx = spectrogram_transformer(audios)  # shape : (n_audios, n_f, n_blocks)
        sxx = torch.log10(torch.clamp(sxx.permute(0, 2, 1), min=1e-10))
        cepstral_mat = cepstral_transformer(sxx[:, :, self.ind_f_filtered]).squeeze(dim=3)[
            :, :, self.ind_cf_filtered
        ]

        return torch.cat(
            [
                cepstral_mat.mean(dim=1),
                sxx.mean(dim=1),
            ],
            dim=1,
        )


def batch_audio_loader(
    dataset,
    waveform_duration=3,
    batch_size=1,
    sr=12000,
    device="cpu",
    padding_method=None,
    offset=0,
):
    """
    Loads and preprocesses audio data from a dataset for training or inference in batches.

    Parameters
    ----------
    dataset : Dataset
        The dataset containing audio samples and labels.
    waveform_duration : float, optional
        Desired duration of the audio waveforms in seconds (default is 3).
    batch_size : int, optional
        Number of audio samples per batch (default is 1).
    sr : int, optional
        Target sampling rate for audio processing (default is 12000).
    device : str, optional
        Device for processing ("cpu" or "cuda") (default is "cpu").
    padding_method : str, optional
        Method to pad audio waveforms smaller than the desired size (e.g., "zero", "reflect").
    offset : int, optional
        Number of samples to skip before processing the first audio sample (default is 0).

    Yields
    ------
    tuple
        A tuple (batch_audios, batch_labels), where:
        - batch_audios is a tensor of processed audio waveforms.
        - batch_labels is a tensor of corresponding audio labels.

    Raises
    ------
    ValueError
        If an unsupported sampling rate is encountered in the dataset.
    """

    def process_resampling(resample_buffer, resample_indices, batch_audios, sr, target_sr):
        if resample_buffer:
            resampler = torchaudio.transforms.Resample(
                orig_freq=sr, new_freq=target_sr, lowpass_filter_width=6
            )
            resampled = resampler(torch.stack(resample_buffer))
            for idx, original_idx in enumerate(resample_indices):
                batch_audios[original_idx] = resampled[idx]

    device = torch.device("cuda" if device == "cuda" and torch.cuda.is_available() else "cpu")
    batch_audios, batch_labels = [], []
    resample_24000, resample_24000_indices = [], []

    for i in range(len(dataset)):
        pa_subtable = query_table(dataset._data, i, indices=dataset._indices)
        wav_bytes = pa_subtable[0][0][0].as_py()
        sampling_rate = struct.unpack("<I", wav_bytes[24:28])[0]

        if sampling_rate not in [sr, sr * 2]:
            raise ValueError(
                f"Unsupported sampling rate: {sampling_rate}Hz. Only {sr}Hz and {sr * 2}Hz are allowed."
            )

        data_size = struct.unpack("<I", wav_bytes[40:44])[0] // 2
        if data_size == 0:
            batch_audios.append(torch.zeros(int(waveform_duration * SR)))
        else:
            try:
                waveform = (
                    torch.frombuffer(wav_bytes[44:], dtype=torch.int16, offset=offset)[
                        : int(waveform_duration * sampling_rate)
                    ].float()
                    / 32767
                )
            except Exception as e:
                continue  # May append during fit for small audios. offset is set to 0 during predict.
            waveform = apply_padding(
                waveform, int(waveform_duration * sampling_rate), padding_method
            )

            if sampling_rate == sr:
                batch_audios.append(waveform)
            elif sampling_rate == 2 * sr:
                resample_24000.append(waveform)
                resample_24000_indices.append(len(batch_audios))
                batch_audios.append(None)

        batch_labels.append(pa_subtable[1][0].as_py())

        if len(batch_audios) == batch_size:
            # Perform resampling once and take advantage of Torch's vectorization capabilities.
            process_resampling(resample_24000, resample_24000_indices, batch_audios, sr * 2, SR)

            batch_audios_on_device = torch.stack(batch_audios).to(device)
            batch_labels_on_device = torch.tensor(batch_labels).to(device)

            yield batch_audios_on_device, batch_labels_on_device

            batch_audios, batch_labels = [], []
            resample_24000, resample_24000_indices = [], []

    if batch_audios:
        process_resampling(resample_24000, resample_24000_indices, batch_audios, sr * 2, SR)
        batch_audios_on_device = torch.stack(batch_audios).to(device)
        batch_labels_on_device = torch.tensor(batch_labels).to(device)

        yield batch_audios_on_device, batch_labels_on_device


def apply_padding(waveform, output_size, padding_method="zero"):
    """
    Applies padding to the waveform when its size is smaller than the desired output size.

    Parameters
    ----------
    waveform : torch.Tensor
        Input 1D waveform tensor.
    output_size : int
        Desired output size after padding or truncation.
    padding_method : str, default="zero"
        Padding method to apply.

    Returns
    -------
    torch.Tensor
        Padded or truncated waveform of size `output_size`.
    """
    if waveform.size(0) >= output_size:
        return waveform[:output_size]

    total_pad = output_size - waveform.size(0)
    if padding_method == "zero":
        return F.pad(waveform, (0, total_pad), mode="constant", value=0)
    if padding_method in ["reflect", "replicate", "circular"]:
        # Pad not possible if waveform.size(0) < total_pad.
        if waveform.size(0) < total_pad:
            num_repeats = (total_pad // waveform.size(0)) + 1
            waveform = torch.tile(waveform, (num_repeats,))
            total_pad = output_size - waveform.size(0)

        return F.pad(waveform.unsqueeze(0), (0, total_pad), mode=padding_method).squeeze()
    raise ValueError(f"Invalid padding method: {padding_method}")