Spaces:
Running
Running
import os | |
import json | |
from collections import deque | |
from dotenv import load_dotenv | |
import gradio as gr | |
from langchain_openai import ChatOpenAI | |
from langchain.schema import HumanMessage, SystemMessage | |
from openai import OpenAI | |
import tempfile | |
import time | |
# Load environment variables | |
load_dotenv() | |
# Function to read questions from JSON | |
def read_questions_from_json(file_path): | |
if not os.path.exists(file_path): | |
raise FileNotFoundError(f"The file '{file_path}' does not exist.") | |
with open(file_path, 'r') as f: | |
questions_list = json.load(f) | |
if not questions_list: | |
raise ValueError("The JSON file is empty or has invalid content.") | |
return questions_list | |
# Function to convert text to speech | |
def convert_text_to_speech(text): | |
start_time = time.time() | |
try: | |
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) | |
response = client.audio.speech.create(model="tts-1", voice="alloy", input=text) | |
# Save the audio stream to a temporary file | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: | |
for chunk in response.iter_bytes(): | |
tmp_file.write(chunk) | |
temp_audio_path = tmp_file.name | |
print(f"DEBUG - Text-to-speech conversion time: {time.time() - start_time:.2f} seconds") | |
return temp_audio_path | |
except Exception as e: | |
print(f"Error during text-to-speech conversion: {e}") | |
return None | |
# Function to transcribe audio | |
def transcribe_audio(audio_file_path): | |
start_time = time.time() | |
try: | |
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) | |
with open(audio_file_path, "rb") as audio_file: | |
transcription = client.audio.transcriptions.create( | |
model="whisper-1", | |
file=audio_file | |
) | |
print(f"DEBUG - Audio transcription time: {time.time() - start_time:.2f} seconds") | |
return transcription.text | |
except Exception as e: | |
print(f"Error during audio transcription: {e}") | |
return None | |
# Conduct interview and handle user input | |
def conduct_interview(questions, language="English", history_limit=5): | |
start_time = time.time() | |
openai_api_key = os.getenv("OPENAI_API_KEY") | |
if not openai_api_key: | |
raise RuntimeError("OpenAI API key not found. Please add it to your .env file as OPENAI_API_KEY.") | |
chat = ChatOpenAI( | |
openai_api_key=openai_api_key, model="gpt-4o", temperature=0.7, max_tokens=750 | |
) | |
conversation_history = deque(maxlen=history_limit) | |
system_prompt = (f"You are Sarah, an empathetic HR interviewer conducting a technical interview in {language}. " | |
"Respond to user follow-up questions politely and concisely. If the user is confused, provide clear clarification.") | |
interview_data = [] | |
current_question_index = [0] # Use a list to hold the index | |
is_interview_finished = False | |
initial_message = ("👋 Hi there, I'm Sarah, your friendly AI HR assistant! " | |
"I'll guide you through a series of interview questions to learn more about you. " | |
"Take your time and answer each question thoughtfully.") | |
final_message = "That wraps up our interview. Thank you so much for your responses—it's been great learning more about you!" | |
print(f"DEBUG - conduct_interview setup time: {time.time() - start_time:.2f} seconds") | |
def interview_step(user_input, audio_input, history): | |
nonlocal current_question_index | |
nonlocal is_interview_finished | |
step_start_time = time.time() | |
# Transcribe audio input if provided | |
if audio_input: | |
user_input = transcribe_audio(audio_input) | |
print("Transcription:", user_input) | |
if user_input.lower() in ["exit", "quit"]: | |
history.append({"role": "assistant", "content": "The interview has ended at your request. Thank you for your time!"}) | |
is_interview_finished = True | |
return history, "", None | |
# If interview is finished, do nothing | |
if is_interview_finished: | |
return history, "", None | |
question_text = questions[current_question_index[0]] | |
history_content = "\n".join([f"Q: {entry['question']}\nA: {entry['answer']}" for entry in conversation_history]) | |
combined_prompt = (f"{system_prompt}\n\nPrevious conversation history:\n{history_content}\n\n" | |
f"Current question: {question_text}\nUser's input: {user_input}\n\n" | |
"Respond in a warm and conversational way, offering natural follow-ups if needed.") | |
messages = [ | |
SystemMessage(content=system_prompt), | |
HumanMessage(content=combined_prompt) | |
] | |
chat_start_time = time.time() | |
response = chat.invoke(messages) | |
print(f"DEBUG - Chat response time: {time.time() - chat_start_time:.2f} seconds") | |
response_content = response.content.strip() | |
# Convert response to speech | |
audio_file_path = convert_text_to_speech(response_content) | |
conversation_history.append({"question": question_text, "answer": user_input}) | |
interview_data.append({"question": question_text, "answer": user_input}) | |
# Use the correct format for messages | |
history.append({"role": "user", "content": user_input}) | |
history.append({"role": "assistant", "content": response_content}) | |
if current_question_index[0] + 1 < len(questions): | |
current_question_index[0] += 1 | |
next_question = f"Alright, let's move on. {questions[current_question_index[0]]}" | |
next_question_audio_path = convert_text_to_speech(next_question) | |
history.append({"role": "assistant", "content": next_question}) | |
print(f"DEBUG - Interview step time: {time.time() - step_start_time:.2f} seconds") | |
return history, "", next_question_audio_path | |
else: | |
# Convert final message to speech and play it | |
final_message_audio_path = convert_text_to_speech(final_message) | |
history.append({"role": "assistant", "content": final_message}) | |
# Convert the last question to speech | |
last_question_audio_path = convert_text_to_speech(questions[current_question_index[0]]) | |
is_interview_finished = True | |
print(f"DEBUG - Interview step time: {time.time() - step_start_time:.2f} seconds") | |
return history, "", last_question_audio_path | |
return interview_step, initial_message, final_message | |
# Gradio interface | |
def main(): | |
QUESTIONS_FILE_PATH = "questions.json" # Ensure you have a questions.json file with your interview questions | |
try: | |
questions = read_questions_from_json(QUESTIONS_FILE_PATH) | |
interview_func, initial_message, final_message = conduct_interview(questions) | |
css = """ | |
.contain { display: flex; flex-direction: column; } | |
.gradio-container { height: 100vh !important; } | |
#component-0 { height: 100%; } | |
.chatbot { flex-grow: 1; overflow: auto; height: 100px; } | |
.chatbot .wrap.svelte-1275q59.wrap.svelte-1275q59 {flex-wrap : nowrap !important} | |
.user > div > .message {background-color : #dcf8c6 !important} | |
.bot > div > .message {background-color : #f7f7f8 !important} | |
""" | |
with gr.Blocks(css=css) as demo: | |
gr.Markdown(""" | |
<h1 style='text-align: center; margin-bottom: 1rem'>👋 Welcome to Your AI HR Interview Assistant</h1> | |
""") | |
start_btn = gr.Button("Start Interview", variant="primary") | |
gr.Markdown(""" | |
<p style='text-align: center; margin-bottom: 1rem'>I will ask you a series of questions. Please answer honestly and thoughtfully. When you are ready, click "Start Interview" to begin.</p> | |
""") | |
chatbot = gr.Chatbot(label="Interview Chat", elem_id="chatbot", height=650, type='messages') | |
audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Record Your Answer") | |
user_input = gr.Textbox(label="Your Response", placeholder="Type your answer here or use the microphone...", lines=1) | |
audio_output = gr.Audio(label="Response Audio", autoplay=True) | |
with gr.Row(): | |
submit_btn = gr.Button("Submit", variant="primary") | |
clear_btn = gr.Button("Clear Chat") | |
def start_interview(): | |
history = [] | |
# Convert and play initial message | |
start_time = time.time() | |
initial_audio_path = convert_text_to_speech(initial_message) | |
# Combine initial message and first question | |
first_question = "Let's begin! Here's your first question: " + questions[0] | |
combined_message = initial_message + " " + first_question | |
# Convert combined message to speech | |
combined_audio_path = convert_text_to_speech(combined_message) | |
history.append({"role": "assistant", "content": combined_message}) | |
print(f"DEBUG - Initial message audio time: {time.time() - start_time:.2f} seconds") | |
return history, "", combined_audio_path | |
def clear_interview(): | |
# Reset the interview state | |
interview_func, initial_message, final_message = conduct_interview(questions) | |
return [], "", None | |
def interview_step_wrapper(user_response, audio_response, history): | |
history, _, audio_path = interview_func(user_response, audio_response, history) | |
time.sleep(0.1) # Reduced delay | |
return history, "", audio_path | |
def on_enter_submit(history, user_response): | |
if not user_response.strip(): | |
return history, "", None | |
history, _, audio_path = interview_step_wrapper(user_response, None, history) | |
time.sleep(0.1) # Reduced delay | |
return history, "", audio_path | |
audio_input.stop_recording(interview_step_wrapper, inputs=[user_input, audio_input, chatbot], outputs=[chatbot, user_input, audio_output]) | |
start_btn.click(start_interview, inputs=[], outputs=[chatbot, user_input, audio_output]) | |
submit_btn.click(interview_step_wrapper, inputs=[user_input, audio_input, chatbot], outputs=[chatbot, user_input, audio_output]) | |
user_input.submit(on_enter_submit, inputs=[chatbot, user_input], outputs=[chatbot, user_input, audio_output]) | |
clear_btn.click(clear_interview, inputs=[], outputs=[chatbot, user_input, audio_output]) | |
demo.launch() | |
except Exception as e: | |
print(f"Error: {e}") | |
if __name__ == "__main__": | |
main() |