import streamlit as st
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
from datasets import load_dataset
from huggingface_hub import HfApi, Repository
import os
import matplotlib.pyplot as plt

import utils

# Hugging Face Hub credentials
HF_TOKEN = os.getenv("HF_TOKEN")
MODEL_REPO_ID = "louiecerv/amer_sign_lang_data_augmentation"
DATASET_REPO_ID = "louiecerv/american_sign_language"

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
st.write(f"Device: {device}")

# Define the new CNN model
IMG_HEIGHT = 28
IMG_WIDTH = 28
IMG_CHS = 1
N_CLASSES = 24

class MyConvBlock(nn.Module):
    def __init__(self, in_ch, out_ch, dropout_p):
        kernel_size = 3
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_ch, out_ch, kernel_size, stride=1, padding=1),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(),
            nn.Dropout(dropout_p),
            nn.MaxPool2d(2, stride=2)
        )

    def forward(self, x):
        return self.model(x)

flattened_img_size = 75 * 3 * 3

# Input 1 x 28 x 28
base_model = nn.Sequential(
    MyConvBlock(IMG_CHS, 25, 0), # 25 x 14 x 14
    MyConvBlock(25, 50, 0.2), # 50 x 7 x 7
    MyConvBlock(50, 75, 0), # 75 x 3 x 3
    nn.Flatten(),
    nn.Linear(flattened_img_size, 512),
    nn.Dropout(.3),
    nn.ReLU(),
    nn.Linear(512, N_CLASSES)
)

# Streamlit app
def main():
    st.title("American Sign Language Recognition")
    
    # Move slider and button to sidebar
    num_epochs = st.sidebar.slider("Number of Epochs", 1, 20, 5)
    train_button = st.sidebar.button("Train Model")

    # Load the dataset from Hugging Face Hub
    dataset = load_dataset(DATASET_REPO_ID)

    # Data loaders with preprocessing and data augmentation:
    random_transforms = transforms.Compose([
        transforms.RandomRotation(5),
        transforms.RandomResizedCrop((IMG_WIDTH, IMG_HEIGHT), scale=(.9, 1), ratio=(1, 1)),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(brightness=.2, contrast=.5),
        transforms.Normalize(mean=[0.5], std=[0.5])
    ])

    def collate_fn(batch):
        images = []
        labels = []
        for item in batch:
            if 'pixel_values' in item and 'label' in item:
                image = torch.tensor(item['pixel_values'])
                label = item['label']
                try:
                    image = random_transforms(image)
                    images.append(image)
                    labels.append(label)
                except Exception as e:
                    print(f"Error processing image: {e}")
                    continue

        if not images:
            return torch.tensor([]), torch.tensor([])

        images = torch.stack(images).to(device)
        labels = torch.tensor(labels).long().to(device)
        return images, labels

    train_loader = DataLoader(dataset["train"], batch_size=64, shuffle=True, collate_fn=collate_fn)
    val_loader = DataLoader(dataset["validation"], batch_size=64, collate_fn=collate_fn)

    # Model, loss, and optimizer
    model = base_model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    loss_history = []
    accuracy_history = []

    if train_button:
        for epoch in range(num_epochs):
            total = 0
            correct = 0
            epoch_loss = 0
            for i, (images, labels) in enumerate(train_loader):
                if images.nelement() == 0:
                    continue

                # Forward pass
                outputs = model(images)
                loss = criterion(outputs, labels)
                epoch_loss += loss.item()

                # Backward and optimize
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            epoch_accuracy = 100 * correct / total
            loss_history.append(epoch_loss / len(train_loader))
            accuracy_history.append(epoch_accuracy)

            st.write(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss / len(train_loader):.4f}, Accuracy: {epoch_accuracy:.2f}%')

        # Plot loss and accuracy
        fig, ax1 = plt.subplots()
        ax2 = ax1.twinx()
        ax1.plot(loss_history, 'g-', label='Loss')
        ax2.plot(accuracy_history, 'b-', label='Accuracy')
        ax1.set_xlabel('Epoch')
        ax1.set_ylabel('Loss', color='g')
        ax2.set_ylabel('Accuracy (%)', color='b')
        plt.title('Training Loss and Accuracy')
        st.pyplot(fig)

if __name__ == "__main__":
    main()