import streamlit as st import whisper from moviepy.editor import VideoFileClip from tempfile import NamedTemporaryFile import numpy as np from transformers import BertTokenizer, BertModel import torch import os # Load the pre-trained BERT model and tokenizer tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model = BertModel.from_pretrained('bert-base-uncased') # Load Whisper model for transcription whisper_model = whisper.load_model("base") # Define criteria for scoring responses criteria = { "technical": ["machine learning", "data", "preprocess", "decision tree", "SVM", "neural network", "hyperparameter"], "problem_solving": ["cross-validation", "grid search", "evaluate", "optimize", "performance"], "communication": ["I would", "then", "and", "also"] } # Function to encode a response using BERT def encode_response(response): inputs = tokenizer(response, return_tensors='pt', padding=True, truncation=True) outputs = model(**inputs) return outputs.last_hidden_state.mean(dim=1).squeeze().detach().numpy() # Function to score the response based on predefined criteria def score_response(response, criteria): scores = {} for criterion, keywords in criteria.items(): scores[criterion] = sum([1 for word in keywords if word in response.lower()]) / len(keywords) return scores # Function to rank candidates by average score def rank_candidates(candidates): for candidate in candidates: avg_score = np.mean(list(candidate['scores'].values())) candidate['avg_score'] = avg_score ranked_candidates = sorted(candidates, key=lambda x: x['avg_score'], reverse=True) return ranked_candidates # Function to extract audio from the video and perform transcription using Whisper def transcribe_video(video_file): # Save the uploaded file to a temporary location with NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file: temp_video_file.write(video_file.read()) temp_video_path = temp_video_file.name # Load the video and extract audio video = VideoFileClip(temp_video_path) audio_file = "audio.wav" video.audio.write_audiofile(audio_file) whisper_model = whisper.load_model("base") transcription = whisper_model.transcribe(audio_file) os.remove(audio_file) os.remove(temp_video_path) return transcription['text'] st.title("AI Role Candidate Screening via Video Interview") default_videos = ["Unlocking AI_ Insights from a Machine Learning Engineer.mp4", "Navigating Ethical Challenges in AI.mp4"] num_candidates = st.number_input("Enter the number of candidates:", min_value=1, max_value=10, value=1) mock_interviews = [] for i in range(num_candidates): st.write(f"### Candidate {i+1}") # Provide options to either upload a video or use a default video use_default = st.checkbox(f"Use default video for Candidate {i+1}?", key=f"default_{i}") if use_default: video_file_path = default_videos[i % len(default_videos)] # Cycle through default videos st.write(f"Using default video: {video_file_path}") video_file = open(video_file_path, "rb") # Open the default video file as binary else: video_file = st.file_uploader(f"Upload interview video for Candidate {i+1}:", type=["mp4", "mov", "avi"], key=f"video_{i}") if video_file: st.write(f"Processing video for Candidate {i+1}...") if not use_default: transcription = transcribe_video(video_file) # For uploaded videos else: transcription = transcribe_video(video_file) # For default videos st.write(f"Transcript for Candidate {i+1}: {transcription}") mock_interviews.append({"name": f"Candidate {i+1}", "response": transcription}) # Analyze the candidates when the user clicks the "Analyze" button if st.button('Analyze Responses'): if mock_interviews: # Encode and score each candidate scored_candidates = [] for candidate in mock_interviews: scores = score_response(candidate['response'], criteria) candidate['scores'] = scores candidate['encoded'] = encode_response(candidate['response']) scored_candidates.append(candidate) # Rank the candidates based on scores ranked_candidates = rank_candidates(scored_candidates) # Display the results st.write("### Candidate Rankings") for rank, candidate in enumerate(ranked_candidates, 1): st.write(f"**Rank {rank}: {candidate['name']}**") st.write(f"Average Score: {candidate['avg_score']:.2f}") st.write(f"Scores: {candidate['scores']}") else: st.write("Please upload videos for all candidates.")