Spaces:
Running
Running
import streamlit as st | |
import PyPDF2 | |
from docx import Document | |
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM | |
import spacy | |
import pytextrank | |
from langchain_google_genai import ChatGoogleGenerativeAI | |
from langchain_core.messages import SystemMessage | |
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder | |
from langchain.memory import ConversationBufferMemory | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain_core.runnables import RunnablePassthrough, RunnableLambda | |
import spacy | |
import subprocess | |
import re | |
# Function to check and download spaCy model | |
def ensure_spacy_model(model_name="en_core_web_sm"): | |
try: | |
spacy.load(model_name) | |
except OSError: | |
subprocess.run(["python", "-m", "spacy", "download", model_name]) | |
spacy.load(model_name) | |
# Function to extract text from PDF | |
def extract_text_from_pdf(uploaded_file): | |
text = "" | |
reader = PyPDF2.PdfReader(uploaded_file) | |
for page in reader.pages: | |
text += page.extract_text() | |
return text | |
# Function to extract text from Word document | |
def extract_text_from_word(uploaded_file): | |
text = "" | |
doc = Document(uploaded_file) | |
for paragraph in doc.paragraphs: | |
text += paragraph.text + "\n" | |
return text | |
# Function to summarize text | |
def summarize_text(text, max_length=1000, min_length=30): | |
max_length = min(max_length, 1000) # Ensure max_length doesn't exceed 1000 | |
try: | |
# Initialize the summarizer pipeline | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False) | |
if isinstance(summary, list) and len(summary) > 0: | |
return summary[0]['summary_text'] | |
else: | |
raise ValueError("Unexpected summarizer output format") | |
except Exception as e: | |
return f"Error in summarization: {e}" | |
# Function to extract keywords using spaCy and PyTextRank | |
def extract_keywords(text, top_n=15): | |
ensure_spacy_model("en_core_web_sm") | |
nlp = spacy.load("en_core_web_sm") | |
nlp.add_pipe("textrank", last=True) | |
doc = nlp(text) | |
keywords = [phrase.text for phrase in doc._.phrases[:top_n]] | |
return keywords | |
def parse_mcq_questions(mcq_list): | |
# Split the string into individual questions | |
questions = re.split(r'\d+\.\s+', mcq_list)[1:] # Skip the empty first element | |
parsed_questions = [] | |
for q in questions: | |
# Split into question and options | |
parts = q.strip().split(' - ') | |
question = parts[0].strip() | |
options = { | |
opt[0]: opt[2:].strip() | |
for opt in parts[1:] | |
} | |
parsed_questions.append({ | |
'question': question, | |
'options': options | |
}) | |
return parsed_questions | |
# Function to generate MCQs using LLM | |
def generate_mcqs(keywords): | |
query = {"human_input": f""" | |
You are an advanced AI model trained to generate high-quality multiple-choice questions (MCQs). | |
Based on the provided list of skills: {keywords}, create **exactly 10 MCQs**. Each MCQ should focus on most important concepts related to the internal topics of each skill. | |
For example, if the keyword is \"Python,\" the questions should be derived from core Python concepts, like data structures, syntax, or libraries. | |
The MCQs should follow this structure: | |
1. A clear and concise important question based on a topic within the skill. | |
2. Four options (labeled as A, B, C, and D). | |
3. Only one correct answer per question, with the other options serving as plausible distractors. | |
Do not provide any other information, explanations, or extra text. Output **only** the 10 MCQs in proper structure, like this: | |
1. Question text... | |
- A) Option 1 | |
- B) Option 2 | |
- C) Option 3 | |
- D) Option 4 | |
2. Question text... | |
- A) Option 1 | |
- B) Option 2 | |
- C) Option 3 | |
- D) Option 4 | |
Continue this format for all 10 questions. | |
"""} | |
response = chain.invoke(query) | |
memory.save_context(query, {"output": response}) | |
return response | |
# Function to evaluate MCQ answers | |
def evaluate_mcqs(mcq_list, answers): | |
query = {"human_input": f""" | |
You are an advanced AI model trained to evaluate answers for high-quality multiple-choice questions (MCQs). Act as an expert professional in all relevant skills and concepts, analyzing the user's answers in detail. Follow these instructions: | |
1. Evaluate the provided answers {answers} against the correct answers for the MCQs. | |
2. Award 1 mark for each correct answer. Determine if each answer is correct or incorrect. | |
3. For incorrect answers: | |
- Analyze deeply to identify the specific concepts or subtopics within the skill where the user is struggling. | |
- Provide a focused list of concepts the user needs to improve on, derived from the incorrect answers. | |
4. At the end of the evaluation, output: | |
- Total marks scored (out of 10). | |
- A detailed and analyzed one by one list of concepts to focus on, ensuring they address the root areas of misunderstanding or lack of knowledge. | |
Output **only** the following information: | |
- Total marks scored: X/10 | |
- Concepts to focus on: [Provide an analyzed and specific list of concepts derived from incorrect answers] | |
"""} | |
response = chain.invoke(query) | |
memory.save_context(query, {"output": response}) | |
return response | |
# Initialize Google Generative AI chat model | |
def initialize_chat_model(): | |
with open("key.txt", "r") as f: | |
GOOGLE_API_KEY = f.read().strip() | |
chat_model = ChatGoogleGenerativeAI( | |
google_api_key=GOOGLE_API_KEY, | |
model="gemini-1.5-pro-latest", | |
temperature=0.4, | |
max_tokens=2000, | |
timeout=120, | |
max_retries=5, | |
top_p=0.9, | |
top_k=40, | |
presence_penalty=0.6, | |
frequency_penalty=0.3 | |
) | |
return chat_model | |
chat_model = initialize_chat_model() | |
# Create Chat Template | |
chat_prompt_template = ChatPromptTemplate.from_messages( | |
[ | |
SystemMessage( | |
content=""" You are a language model designed to follow user instructions exactly as given. | |
Do not take any actions or provide any information unless specifically directed by the user. | |
Your role is to fulfill the user's requests precisely without deviating from the instructions provided.""" | |
), | |
MessagesPlaceholder(variable_name="chat_history"), | |
HumanMessagePromptTemplate.from_template("{human_input}") | |
] | |
) | |
# Initialize the Memory | |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) | |
# Create an Output Parser | |
output_parser = StrOutputParser() | |
# Define a chain | |
chain = RunnablePassthrough.assign( | |
chat_history=RunnableLambda(lambda human_input: memory.load_memory_variables(human_input)['chat_history']) | |
) | chat_prompt_template | chat_model | output_parser | |
# Streamlit App | |
st.title("Interview Preparation with AI") | |
st.markdown("## Part-1: Upload Files, Summarize, and Extract Keywords") | |
# File upload section | |
file1 = st.file_uploader("Upload your resume (PDF or DOCX):", type=["pdf", "docx"]) | |
file2 = st.file_uploader("Upload the job description (PDF or DOCX):", type=["pdf", "docx"]) | |
if file1 and file2: | |
try: | |
# Detect file type and extract text for file 1 | |
if file1.name.endswith('.pdf'): | |
text1 = extract_text_from_pdf(file1) | |
elif file1.name.endswith('.docx'): | |
text1 = extract_text_from_word(file1) | |
else: | |
st.error("Unsupported file type for file 1") | |
# Detect file type and extract text for file 2 | |
if file2.name.endswith('.pdf'): | |
text2 = extract_text_from_pdf(file2) | |
elif file2.name.endswith('.docx'): | |
text2 = extract_text_from_word(file2) | |
else: | |
st.error("Unsupported file type for file 2") | |
# Summarize texts | |
#st.markdown("### Summarizing the uploaded documents...") | |
#summary1 = summarize_text(text1) | |
#summary2 = summarize_text(text2) | |
#st.markdown("### Results for File 1 (Resume)") | |
#st.subheader("Summary:") | |
#st.write(summary1) | |
#st.markdown("### Results for File 2 (Job Description)") | |
#st.subheader("Summary:") | |
#st.write(summary2) | |
# Ensure session state variables are initialized | |
if "keywords_extracted" not in st.session_state: | |
st.session_state.keywords_extracted = False | |
if "ats_score_calculated" not in st.session_state: | |
st.session_state.ats_score_calculated = False | |
# Button to Extract Keywords | |
if st.button("Extract Keywords") or st.session_state.keywords_extracted: | |
st.session_state.keywords_extracted = True | |
# Extract keywords | |
st.markdown("### Extracting keywords...") | |
keywords1 = extract_keywords(text1) | |
keywords2 = extract_keywords(text2) | |
# Display Keywords | |
st.markdown("### Results for File 1 (Resume)") | |
st.subheader("Keywords:") | |
st.write(", ".join(keywords1)) | |
st.markdown("### Results for File 2 (Job Description)") | |
st.subheader("Keywords:") | |
st.write(", ".join(keywords2)) | |
resume_keywords = set(keywords1) | |
job_description_keywords = set(keywords2) | |
# Button to Calculate ATS Score | |
if st.button("ATS Score") or st.session_state.ats_score_calculated: | |
st.session_state.ats_score_calculated = True | |
st.markdown("### ATS Score Calculation") | |
query = {"human_input": f""" | |
You are an advanced Applicant Tracking System (ATS) designed to evaluate resumes against job descriptions with exceptional accuracy. Analyze the following keywords extracted from a job description and a resume, compare them, and calculate the match percentage. | |
Job Description Keywords: | |
{list(job_description_keywords)} | |
Resume Keywords: | |
{list(resume_keywords)} | |
Provide the ATS score as a percentage match between the resume and the job description in the following format: | |
The ATS Score of your Resume According to the Job Description is \"XX%\". | |
"""} | |
response = chain.invoke(query) | |
memory.save_context(query, {"output": response}) | |
st.write(response) | |
st.title("Multiple Choice Quiz") | |
# Initialize session state variables if they don't exist | |
if 'current_question' not in st.session_state: | |
st.session_state.current_question = 0 | |
if st.button("MCQ Test"): | |
if 'answers' not in st.session_state: | |
st.session_state.answers = [] | |
if 'questions' not in st.session_state: | |
# Your MCQ string goes here | |
mcq_list = generate_mcqs(job_description_keywords) | |
st.session_state.questions = parse_mcq_questions(mcq_list) | |
# Display current question number and total questions | |
st.write(f"Question {st.session_state.current_question + 1} of {len(st.session_state.questions)}") | |
# Display current question | |
current_q = st.session_state.questions[st.session_state.current_question] | |
st.write(current_q['question']) | |
# Create radio buttons for options with the corrected format_func | |
answer = st.radio( | |
"Select your answer:", | |
options=['A', 'B', 'C', 'D'], # List of option keys | |
format_func=lambda x: f"{x}) {current_q['options'].get(x, ' ')}", | |
key=f"question_{st.session_state.current_question}" # Unique key per question | |
) | |
# Navigation buttons in columns | |
col1, col2 = st.columns(2) | |
if st.session_state.current_question > 0: | |
with col1: | |
if st.button("Previous"): | |
st.session_state.current_question -= 1 | |
st.rerun() | |
if st.session_state.current_question < len(st.session_state.questions) - 1: | |
with col2: | |
if st.button("Next"): | |
st.session_state.answers.append(f"{st.session_state.current_question + 1}-{answer}") | |
st.session_state.current_question += 1 | |
st.rerun() | |
else: | |
with col2: | |
if st.button("Submit"): | |
st.session_state.answers.append(f"{st.session_state.current_question + 1}-{answer}") | |
st.write("Quiz completed! Your answers:") | |
st.write(st.session_state.answers) | |
# Add a restart button | |
if st.button("Restart Quiz"): | |
st.session_state.current_question = 0 | |
st.session_state.answers = [] | |
st.rerun() | |
except Exception as e: | |
st.error(f"An error occurred: {e}") | |
else: | |
st.info("Please upload both files to proceed.") |