Spaces:
Sleeping
Sleeping
import streamlit as st | |
import PyPDF2 | |
from docx import Document | |
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM | |
import spacy | |
import pytextrank | |
from langchain_google_genai import ChatGoogleGenerativeAI | |
from langchain_core.messages import SystemMessage | |
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder | |
from langchain.memory import ConversationBufferMemory | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain_core.runnables import RunnablePassthrough, RunnableLambda | |
import spacy | |
import subprocess | |
# Function to check and download spaCy model | |
def ensure_spacy_model(model_name="en_core_web_sm"): | |
try: | |
spacy.load(model_name) | |
except OSError: | |
subprocess.run(["python", "-m", "spacy", "download", model_name]) | |
spacy.load(model_name) | |
# Function to extract text from PDF | |
def extract_text_from_pdf(uploaded_file): | |
text = "" | |
reader = PyPDF2.PdfReader(uploaded_file) | |
for page in reader.pages: | |
text += page.extract_text() | |
return text | |
# Function to extract text from Word document | |
def extract_text_from_word(uploaded_file): | |
text = "" | |
doc = Document(uploaded_file) | |
for paragraph in doc.paragraphs: | |
text += paragraph.text + "\n" | |
return text | |
# Function to summarize text | |
def summarize_text(text, max_length=1000, min_length=30): | |
max_length = min(max_length, 1000) # Ensure max_length doesn't exceed 1000 | |
try: | |
# Initialize the summarizer pipeline | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False) | |
if isinstance(summary, list) and len(summary) > 0: | |
return summary[0]['summary_text'] | |
else: | |
raise ValueError("Unexpected summarizer output format") | |
except Exception as e: | |
return f"Error in summarization: {e}" | |
# Function to extract keywords using spaCy and PyTextRank | |
def extract_keywords(text, top_n=10): | |
ensure_spacy_model("en_core_web_sm") | |
nlp = spacy.load("en_core_web_sm") | |
nlp.add_pipe("textrank", last=True) | |
doc = nlp(text) | |
keywords = [phrase.text for phrase in doc._.phrases[:top_n]] | |
return keywords | |
# Initialize Google Generative AI chat model | |
def initialize_chat_model(): | |
with open("key.txt", "r") as f: | |
GOOGLE_API_KEY = f.read().strip() | |
chat_model = ChatGoogleGenerativeAI( | |
google_api_key=GOOGLE_API_KEY, | |
model="gemini-1.5-pro-latest", | |
temperature=0.4, | |
max_tokens=2000, | |
timeout=120, | |
max_retries=5, | |
top_p=0.9, | |
top_k=40, | |
presence_penalty=0.6, | |
frequency_penalty=0.3 | |
) | |
return chat_model | |
chat_model = initialize_chat_model() | |
# Create Chat Template | |
chat_prompt_template = ChatPromptTemplate.from_messages( | |
[ | |
SystemMessage( | |
content=""" You are a language model designed to follow user instructions exactly as given. | |
Do not take any actions or provide any information unless specifically directed by the user. | |
Your role is to fulfill the user's requests precisely without deviating from the instructions provided.""" | |
), | |
MessagesPlaceholder(variable_name="chat_history"), | |
HumanMessagePromptTemplate.from_template("{human_input}") | |
] | |
) | |
# Initialize the Memory | |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) | |
# Create an Output Parser | |
output_parser = StrOutputParser() | |
# Define a chain | |
chain = RunnablePassthrough.assign( | |
chat_history=RunnableLambda(lambda human_input: memory.load_memory_variables(human_input)['chat_history']) | |
) | chat_prompt_template | chat_model | output_parser | |
# Streamlit App | |
st.title("Interview Preparation with AI") | |
st.markdown("## Part-1: Upload Files, Summarize, and Extract Keywords") | |
# File upload section | |
file1 = st.file_uploader("Upload your resume (PDF or DOCX):", type=["pdf", "docx"]) | |
file2 = st.file_uploader("Upload the job description (PDF or DOCX):", type=["pdf", "docx"]) | |
if file1 and file2: | |
try: | |
# Detect file type and extract text for file 1 | |
if file1.name.endswith('.pdf'): | |
text1 = extract_text_from_pdf(file1) | |
elif file1.name.endswith('.docx'): | |
text1 = extract_text_from_word(file1) | |
else: | |
st.error("Unsupported file type for file 1") | |
# Detect file type and extract text for file 2 | |
if file2.name.endswith('.pdf'): | |
text2 = extract_text_from_pdf(file2) | |
elif file2.name.endswith('.docx'): | |
text2 = extract_text_from_word(file2) | |
else: | |
st.error("Unsupported file type for file 2") | |
# Summarize texts | |
st.markdown("### Summarizing the uploaded documents...") | |
summary1 = summarize_text(text1) | |
summary2 = summarize_text(text2) | |
# Extract keywords | |
st.markdown("### Extracting keywords...") | |
keywords1 = extract_keywords(text1) | |
keywords2 = extract_keywords(text2) | |
# Display results | |
st.markdown("### Results for File 1 (Resume)") | |
st.subheader("Summary:") | |
st.write(summary1) | |
st.subheader("Keywords:") | |
st.write(", ".join(keywords1)) | |
st.markdown("### Results for File 2 (Job Description)") | |
st.subheader("Summary:") | |
st.write(summary2) | |
st.subheader("Keywords:") | |
st.write(", ".join(keywords2)) | |
# Compare keywords | |
st.markdown("### Keyword Analysis") | |
resume_keywords = set(keywords1) | |
job_description_keywords = set(keywords2) | |
st.write("**Resume Keywords:**", ", ".join(resume_keywords)) | |
st.write("**Job Description Keywords:**", ", ".join(job_description_keywords)) | |
common_keywords = resume_keywords.intersection(job_description_keywords) | |
st.write("**Common Keywords:**", ", ".join(common_keywords)) | |
# Calculate ATS Score | |
st.markdown("### ATS Score Calculation") | |
query = {"human_input": f""" | |
You are an advanced Applicant Tracking System (ATS) designed to evaluate resumes against job descriptions with exceptional accuracy. Analyze the following keywords extracted from a job description and a resume, compare them, and calculate the match percentage. | |
Job Description Keywords: | |
{list(job_description_keywords)} | |
Resume Keywords: | |
{list(resume_keywords)} | |
Provide the ATS score as a percentage match between the resume and the job description in the following format: | |
The ATS Score of your Resume According to the Job Description is \"XX%\" | |
"""} | |
response = chain.invoke(query) | |
memory.save_context(query, {"output": response}) | |
st.write(response) | |
except Exception as e: | |
st.error(f"An error occurred: {e}") | |
else: | |
st.info("Please upload both files to proceed.") | |