Spaces:
Sleeping
Sleeping
saifeddinemk
commited on
Commit
•
f5d2489
1
Parent(s):
b0338f5
Init Commit
Browse files- app.py +43 -49
- requirements.txt +3 -3
app.py
CHANGED
@@ -1,72 +1,66 @@
|
|
1 |
-
from
|
2 |
-
from
|
3 |
-
from optimum.intel.openvino import OVModelForCausalLM
|
4 |
import gradio as gr
|
5 |
-
import
|
6 |
|
7 |
-
# Load
|
8 |
-
|
9 |
-
tokenizer = AutoTokenizer.from_pretrained(causal_model_id)
|
10 |
-
causal_model = OVModelForCausalLM.from_pretrained(causal_model_id)
|
11 |
|
12 |
-
# Load
|
13 |
-
|
14 |
-
tokenizer_summarizer = AutoTokenizer.from_pretrained(summarizer_model_id)
|
15 |
-
int8_model = INCModelForSeq2SeqLM.from_pretrained(summarizer_model_id)
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
summary_ids = int8_model.generate(inputs.input_ids, max_length=max_length, min_length=25, do_sample=False)
|
20 |
-
summary = tokenizer_summarizer.decode(summary_ids[0], skip_special_tokens=True)
|
21 |
-
return summary
|
22 |
|
23 |
-
def
|
|
|
|
|
|
|
|
|
|
|
24 |
debug_info = "Debug Info:\n"
|
25 |
results = []
|
26 |
|
27 |
-
# Summarize the CV text
|
28 |
-
summarized_cv = summarize_text(cv_text, max_length=
|
29 |
debug_info += f"Summarized CV Text: {summarized_cv}\n"
|
30 |
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
34 |
|
35 |
-
#
|
36 |
-
|
37 |
-
f"Compare the following job descriptions with this resume. Job Descriptions: {summarized_descriptions}. "
|
38 |
-
f"Resume: {summarized_cv}. Provide a match score and a brief analysis."
|
39 |
-
)
|
40 |
-
debug_info += f"\nGenerated Prompt: {prompt}\n"
|
41 |
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
response_content = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
|
47 |
-
debug_info += f"Model Response: {response_content}\n"
|
48 |
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
|
61 |
return results, debug_info
|
62 |
|
63 |
# Gradio interface
|
64 |
with gr.Blocks() as demo:
|
65 |
-
gr.Markdown("# CV and Job Description Matcher with Summarization and
|
66 |
|
67 |
# Input fields for CV and job descriptions
|
68 |
cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
|
69 |
-
|
70 |
|
71 |
# Button and output area
|
72 |
match_button = gr.Button("Match CV to Job Descriptions")
|
@@ -74,6 +68,6 @@ with gr.Blocks() as demo:
|
|
74 |
debug_output = gr.Textbox(label="Debug Info", lines=10) # Add a debug box to display debug info
|
75 |
|
76 |
# Set button click to run the function
|
77 |
-
match_button.click(fn=match_cv_to_jobs, inputs=[cv_text,
|
78 |
|
79 |
demo.launch()
|
|
|
1 |
+
from sentence_transformers import SentenceTransformer, util
|
2 |
+
from transformers import pipeline
|
|
|
3 |
import gradio as gr
|
4 |
+
import nltk
|
5 |
|
6 |
+
# Load the SentenceTransformer model for sentence similarity
|
7 |
+
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
|
|
|
|
8 |
|
9 |
+
# Load a summarization pipeline (you can use 'facebook/bart-large-cnn' or any other summarization model)
|
10 |
+
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
|
|
|
|
11 |
|
12 |
+
# Download NLTK punkt tokenizer if not already installed (you may need to run this once)
|
13 |
+
nltk.download('punkt')
|
|
|
|
|
|
|
14 |
|
15 |
+
def summarize_text(text, max_length=100, min_length=25):
|
16 |
+
# Summarize the input text
|
17 |
+
summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
|
18 |
+
return summary[0]["summary_text"]
|
19 |
+
|
20 |
+
def match_cv_to_jobs(cv_text, job_descriptions):
|
21 |
debug_info = "Debug Info:\n"
|
22 |
results = []
|
23 |
|
24 |
+
# Summarize the CV text and the job descriptions
|
25 |
+
summarized_cv = summarize_text(cv_text, max_length=150)
|
26 |
debug_info += f"Summarized CV Text: {summarized_cv}\n"
|
27 |
|
28 |
+
summarized_job_desc = summarize_text(job_descriptions, max_length=150)
|
29 |
+
debug_info += f"Summarized Job Description Text: {summarized_job_desc}\n"
|
30 |
+
|
31 |
+
# Encode the summarized CV text
|
32 |
+
cv_embedding = model.encode(summarized_cv, convert_to_tensor=True)
|
33 |
+
debug_info += f"CV Embedding: {cv_embedding}\n"
|
34 |
|
35 |
+
# Split summarized job description into sentences
|
36 |
+
description_sentences = nltk.tokenize.sent_tokenize(summarized_job_desc)
|
|
|
|
|
|
|
|
|
37 |
|
38 |
+
for sentence in description_sentences:
|
39 |
+
# Encode each sentence from the summarized job description
|
40 |
+
sentence_embedding = model.encode(sentence, convert_to_tensor=True)
|
41 |
+
debug_info += f"\nJob Description Sentence Embedding: {sentence_embedding}\n"
|
|
|
|
|
42 |
|
43 |
+
# Compute similarity score
|
44 |
+
similarity_score = util.pytorch_cos_sim(cv_embedding, sentence_embedding).item()
|
45 |
+
debug_info += f"Similarity Score for sentence: {similarity_score}\n"
|
46 |
+
|
47 |
+
results.append({
|
48 |
+
"Job Description Sentence": sentence,
|
49 |
+
"Similarity Score": similarity_score
|
50 |
+
})
|
51 |
+
|
52 |
+
# Sort results by similarity score in descending order
|
53 |
+
results = sorted(results, key=lambda x: x["Similarity Score"], reverse=True)
|
54 |
|
55 |
return results, debug_info
|
56 |
|
57 |
# Gradio interface
|
58 |
with gr.Blocks() as demo:
|
59 |
+
gr.Markdown("# CV and Job Description Matcher with Summarization and Sentence Similarity")
|
60 |
|
61 |
# Input fields for CV and job descriptions
|
62 |
cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
|
63 |
+
job_descriptions = gr.Textbox(label="Job Descriptions", placeholder="Enter the entire job description text here", lines=10)
|
64 |
|
65 |
# Button and output area
|
66 |
match_button = gr.Button("Match CV to Job Descriptions")
|
|
|
68 |
debug_output = gr.Textbox(label="Debug Info", lines=10) # Add a debug box to display debug info
|
69 |
|
70 |
# Set button click to run the function
|
71 |
+
match_button.click(fn=match_cv_to_jobs, inputs=[cv_text, job_descriptions], outputs=[output, debug_output])
|
72 |
|
73 |
demo.launch()
|
requirements.txt
CHANGED
@@ -3,6 +3,6 @@ uvicorn
|
|
3 |
transformers
|
4 |
torch
|
5 |
pydantic
|
6 |
-
|
7 |
-
|
8 |
-
|
|
|
3 |
transformers
|
4 |
torch
|
5 |
pydantic
|
6 |
+
accelerate
|
7 |
+
sentence_transformer
|
8 |
+
nltk
|