Hyma7 commited on
Commit
ec7352f
·
verified ·
1 Parent(s): b7abf64

Rename streamlitApp.py to app.py

Browse files
Files changed (1) hide show
  1. streamlitApp.py → app.py +103 -73
streamlitApp.py → app.py RENAMED
@@ -1,73 +1,103 @@
1
- import streamlit as st
2
- from transformers import BertTokenizer, BertModel
3
- import torch
4
- import numpy as np
5
-
6
- # Load the pre-trained BERT model and tokenizer
7
- tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
8
- model = BertModel.from_pretrained('bert-base-uncased')
9
-
10
- # Define criteria for scoring
11
- criteria = {
12
- "technical": ["machine learning", "data", "preprocess", "decision tree", "SVM", "neural network", "hyperparameter"],
13
- "problem_solving": ["cross-validation", "grid search", "evaluate", "optimize", "performance"],
14
- "communication": ["I would", "then", "and", "also"]
15
- }
16
-
17
- # Function to encode a response using BERT
18
- def encode_response(response):
19
- inputs = tokenizer(response, return_tensors='pt', padding=True, truncation=True)
20
- outputs = model(**inputs)
21
- return outputs.last_hidden_state.mean(dim=1).squeeze().detach().numpy()
22
-
23
- # Function to score the response based on predefined criteria
24
- def score_response(response, criteria):
25
- scores = {}
26
- for criterion, keywords in criteria.items():
27
- scores[criterion] = sum([1 for word in keywords if word in response]) / len(keywords)
28
- return scores
29
-
30
- # Function to rank candidates by average score
31
- def rank_candidates(candidates):
32
- for candidate in candidates:
33
- avg_score = np.mean(list(candidate['scores'].values()))
34
- candidate['avg_score'] = avg_score
35
- ranked_candidates = sorted(candidates, key=lambda x: x['avg_score'], reverse=True)
36
- return ranked_candidates
37
-
38
- # Streamlit app
39
- st.title("AI Role Candidate Screening")
40
-
41
- # Input for the number of candidates
42
- num_candidates = st.number_input("Enter the number of candidates:", min_value=1, max_value=10, value=3)
43
-
44
- # Create input fields for candidate names and responses
45
- mock_interviews = []
46
- for i in range(num_candidates):
47
- name = st.text_input(f"Enter the name of Candidate {i+1}:", key=f"name_{i}")
48
- response = st.text_area(f"Enter the interview response for {name}:", key=f"response_{i}")
49
- if name and response:
50
- mock_interviews.append({"name": name, "response": response})
51
-
52
- # Analyze the candidates when the user clicks the "Analyze" button
53
- if st.button('Analyze Responses'):
54
- if mock_interviews:
55
- # Encode and score each candidate
56
- scored_candidates = []
57
- for candidate in mock_interviews:
58
- scores = score_response(candidate['response'], criteria)
59
- candidate['scores'] = scores
60
- candidate['encoded'] = encode_response(candidate['response'])
61
- scored_candidates.append(candidate)
62
-
63
- # Rank the candidates based on scores
64
- ranked_candidates = rank_candidates(scored_candidates)
65
-
66
- # Display the results
67
- st.write("### Candidate Rankings")
68
- for rank, candidate in enumerate(ranked_candidates, 1):
69
- st.write(f"**Rank {rank}: {candidate['name']}**")
70
- st.write(f"Average Score: {candidate['avg_score']:.2f}")
71
- st.write(f"Scores: {candidate['scores']}")
72
- else:
73
- st.write("Please enter candidate responses.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import whisper
3
+ from moviepy.editor import VideoFileClip
4
+ from tempfile import NamedTemporaryFile
5
+ import numpy as np
6
+ from transformers import BertTokenizer, BertModel
7
+ import torch
8
+ import os
9
+
10
+ # Load the pre-trained BERT model and tokenizer
11
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
12
+ model = BertModel.from_pretrained('bert-base-uncased')
13
+
14
+ # Load Whisper model for transcription
15
+ whisper_model = whisper.load_model("base")
16
+
17
+ # Define criteria for scoring responses
18
+ criteria = {
19
+ "technical": ["machine learning", "data", "preprocess", "decision tree", "SVM", "neural network", "hyperparameter"],
20
+ "problem_solving": ["cross-validation", "grid search", "evaluate", "optimize", "performance"],
21
+ "communication": ["I would", "then", "and", "also"]
22
+ }
23
+
24
+ # Function to encode a response using BERT
25
+ def encode_response(response):
26
+ inputs = tokenizer(response, return_tensors='pt', padding=True, truncation=True)
27
+ outputs = model(**inputs)
28
+ return outputs.last_hidden_state.mean(dim=1).squeeze().detach().numpy()
29
+
30
+ # Function to score the response based on predefined criteria
31
+ def score_response(response, criteria):
32
+ scores = {}
33
+ for criterion, keywords in criteria.items():
34
+ scores[criterion] = sum([1 for word in keywords if word in response.lower()]) / len(keywords)
35
+ return scores
36
+
37
+ # Function to rank candidates by average score
38
+ def rank_candidates(candidates):
39
+ for candidate in candidates:
40
+ avg_score = np.mean(list(candidate['scores'].values()))
41
+ candidate['avg_score'] = avg_score
42
+ ranked_candidates = sorted(candidates, key=lambda x: x['avg_score'], reverse=True)
43
+ return ranked_candidates
44
+
45
+ # Function to extract audio from the video and perform transcription using Whisper
46
+ def transcribe_video(video_file):
47
+ # Save the uploaded file to a temporary location
48
+ with NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
49
+ temp_video_file.write(video_file.read()) # Write video file to the temp file
50
+ temp_video_path = temp_video_file.name
51
+
52
+ # Load the video and extract audio
53
+ video = VideoFileClip(temp_video_path)
54
+ audio_file = "audio.wav"
55
+ video.audio.write_audiofile(audio_file)
56
+
57
+ # Perform transcription with Whisper
58
+ whisper_model = whisper.load_model("base") # Ensure model is loaded here
59
+ transcription = whisper_model.transcribe(audio_file)
60
+
61
+ # Clean up temporary files
62
+ os.remove(audio_file)
63
+ os.remove(temp_video_path)
64
+
65
+ return transcription['text']
66
+
67
+ # Streamlit app
68
+ st.title("AI Role Candidate Screening via Video Interview")
69
+
70
+ # Input for the number of candidates
71
+ num_candidates = st.number_input("Enter the number of candidates:", min_value=1, max_value=10, value=1)
72
+
73
+ mock_interviews = []
74
+ for i in range(num_candidates):
75
+ video_file = st.file_uploader(f"Upload interview video for Candidate {i+1}:", type=["mp4", "mov", "avi"], key=f"video_{i}")
76
+ if video_file:
77
+ st.write(f"Processing video for Candidate {i+1}...")
78
+ transcription = transcribe_video(video_file)
79
+ st.write(f"Transcript for Candidate {i+1}: {transcription}")
80
+ mock_interviews.append({"name": f"Candidate {i+1}", "response": transcription})
81
+
82
+ # Analyze the candidates when the user clicks the "Analyze" button
83
+ if st.button('Analyze Responses'):
84
+ if mock_interviews:
85
+ # Encode and score each candidate
86
+ scored_candidates = []
87
+ for candidate in mock_interviews:
88
+ scores = score_response(candidate['response'], criteria)
89
+ candidate['scores'] = scores
90
+ candidate['encoded'] = encode_response(candidate['response'])
91
+ scored_candidates.append(candidate)
92
+
93
+ # Rank the candidates based on scores
94
+ ranked_candidates = rank_candidates(scored_candidates)
95
+
96
+ # Display the results
97
+ st.write("### Candidate Rankings")
98
+ for rank, candidate in enumerate(ranked_candidates, 1):
99
+ st.write(f"**Rank {rank}: {candidate['name']}**")
100
+ st.write(f"Average Score: {candidate['avg_score']:.2f}")
101
+ st.write(f"Scores: {candidate['scores']}")
102
+ else:
103
+ st.write("Please upload videos for all candidates.")