import streamlit as st
import whisper
from moviepy.editor import VideoFileClip
from tempfile import NamedTemporaryFile
import numpy as np
from transformers import BertTokenizer, BertModel
import torch
import os

# Load the pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

# Load Whisper model for transcription
whisper_model = whisper.load_model("base")

# Define criteria for scoring responses
criteria = {
    "technical": ["machine learning", "data", "preprocess", "decision tree", "SVM", "neural network", "hyperparameter"],
    "problem_solving": ["cross-validation", "grid search", "evaluate", "optimize", "performance"],
    "communication": ["I would", "then", "and", "also"]
}

# Function to encode a response using BERT
def encode_response(response):
    inputs = tokenizer(response, return_tensors='pt', padding=True, truncation=True)
    outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).squeeze().detach().numpy()

# Function to score the response based on predefined criteria
def score_response(response, criteria):
    scores = {}
    for criterion, keywords in criteria.items():
        scores[criterion] = sum([1 for word in keywords if word in response.lower()]) / len(keywords)
    return scores

# Function to rank candidates by average score
def rank_candidates(candidates):
    for candidate in candidates:
        avg_score = np.mean(list(candidate['scores'].values()))
        candidate['avg_score'] = avg_score
    ranked_candidates = sorted(candidates, key=lambda x: x['avg_score'], reverse=True)
    return ranked_candidates

# Function to extract audio from the video and perform transcription using Whisper
def transcribe_video(video_file):
    # Save the uploaded file to a temporary location
    with NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
        temp_video_file.write(video_file.read())  
        temp_video_path = temp_video_file.name

    # Load the video and extract audio
    video = VideoFileClip(temp_video_path)
    audio_file = "audio.wav"
    video.audio.write_audiofile(audio_file)
    
    whisper_model = whisper.load_model("base")  
    transcription = whisper_model.transcribe(audio_file)

    os.remove(audio_file)
    os.remove(temp_video_path)

    return transcription['text']


st.title("AI Role Candidate Screening via Video Interview")

default_videos = ["Unlocking AI_ Insights from a Machine Learning Engineer.mp4", "Navigating Ethical Challenges in AI.mp4"]


num_candidates = st.number_input("Enter the number of candidates:", min_value=1, max_value=10, value=1)

mock_interviews = []
for i in range(num_candidates):
    st.write(f"### Candidate {i+1}")
    
    # Provide options to either upload a video or use a default video
    use_default = st.checkbox(f"Use default video for Candidate {i+1}?", key=f"default_{i}")
    
    if use_default:
        video_file_path = default_videos[i % len(default_videos)]  # Cycle through default videos
        st.write(f"Using default video: {video_file_path}")
        video_file = open(video_file_path, "rb")  # Open the default video file as binary
    else:
        video_file = st.file_uploader(f"Upload interview video for Candidate {i+1}:", type=["mp4", "mov", "avi"], key=f"video_{i}")

    if video_file:
        st.write(f"Processing video for Candidate {i+1}...")
        if not use_default:
            transcription = transcribe_video(video_file)  # For uploaded videos
        else:
            transcription = transcribe_video(video_file)  # For default videos
        
        st.write(f"Transcript for Candidate {i+1}: {transcription}")
        mock_interviews.append({"name": f"Candidate {i+1}", "response": transcription})

# Analyze the candidates when the user clicks the "Analyze" button
if st.button('Analyze Responses'):
    if mock_interviews:
        # Encode and score each candidate
        scored_candidates = []
        for candidate in mock_interviews:
            scores = score_response(candidate['response'], criteria)
            candidate['scores'] = scores
            candidate['encoded'] = encode_response(candidate['response'])
            scored_candidates.append(candidate)
        
        # Rank the candidates based on scores
        ranked_candidates = rank_candidates(scored_candidates)
        
        # Display the results
        st.write("### Candidate Rankings")
        for rank, candidate in enumerate(ranked_candidates, 1):
            st.write(f"**Rank {rank}: {candidate['name']}**")
            st.write(f"Average Score: {candidate['avg_score']:.2f}")
            st.write(f"Scores: {candidate['scores']}")
    else:
        st.write("Please upload videos for all candidates.")