saifeddinemk commited on
Commit
f5d2489
1 Parent(s): b0338f5

Init Commit

Browse files
Files changed (2) hide show
  1. app.py +43 -49
  2. requirements.txt +3 -3
app.py CHANGED
@@ -1,72 +1,66 @@
1
- from transformers import AutoTokenizer
2
- from optimum.intel import INCModelForSeq2SeqLM
3
- from optimum.intel.openvino import OVModelForCausalLM
4
  import gradio as gr
5
- import json
6
 
7
- # Load OpenVINO GPT-J model for causal language modeling
8
- causal_model_id = "OpenVINO/gpt-j-6b-int4-ov"
9
- tokenizer = AutoTokenizer.from_pretrained(causal_model_id)
10
- causal_model = OVModelForCausalLM.from_pretrained(causal_model_id)
11
 
12
- # Load the Intel quantized summarization model
13
- summarizer_model_id = "Intel/distilbart-cnn-12-6-int8-dynamic"
14
- tokenizer_summarizer = AutoTokenizer.from_pretrained(summarizer_model_id)
15
- int8_model = INCModelForSeq2SeqLM.from_pretrained(summarizer_model_id)
16
 
17
- def summarize_text(text, max_length=100):
18
- inputs = tokenizer_summarizer(text, return_tensors="pt", max_length=512, truncation=True)
19
- summary_ids = int8_model.generate(inputs.input_ids, max_length=max_length, min_length=25, do_sample=False)
20
- summary = tokenizer_summarizer.decode(summary_ids[0], skip_special_tokens=True)
21
- return summary
22
 
23
- def match_cv_to_jobs(cv_text, job_descriptions_text):
 
 
 
 
 
24
  debug_info = "Debug Info:\n"
25
  results = []
26
 
27
- # Summarize the CV text
28
- summarized_cv = summarize_text(cv_text, max_length=400)
29
  debug_info += f"Summarized CV Text: {summarized_cv}\n"
30
 
31
- # Summarize all job descriptions at once
32
- summarized_descriptions = summarize_text(job_descriptions_text, max_length=400)
33
- debug_info += f"Summarized Job Descriptions: {summarized_descriptions}\n"
 
 
 
34
 
35
- # Create a prompt to compare the summarized CV with the summarized job descriptions
36
- prompt = (
37
- f"Compare the following job descriptions with this resume. Job Descriptions: {summarized_descriptions}. "
38
- f"Resume: {summarized_cv}. Provide a match score and a brief analysis."
39
- )
40
- debug_info += f"\nGenerated Prompt: {prompt}\n"
41
 
42
- # Generate response from the causal model
43
- inputs = tokenizer(prompt, return_tensors="pt")
44
- try:
45
- outputs = causal_model.generate(**inputs, max_length=1024)
46
- response_content = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
47
- debug_info += f"Model Response: {response_content}\n"
48
 
49
- try:
50
- response_data = json.loads(response_content)
51
- results.append(response_data)
52
- except json.JSONDecodeError:
53
- results.append({
54
- "Job Descriptions": job_descriptions_text,
55
- "Analysis": response_content
56
- })
57
- except Exception as e:
58
- debug_info += f"Error: {str(e)}\n"
59
- results.append({"Job Descriptions": job_descriptions_text, "Error": str(e)})
60
 
61
  return results, debug_info
62
 
63
  # Gradio interface
64
  with gr.Blocks() as demo:
65
- gr.Markdown("# CV and Job Description Matcher with Summarization and Debugging")
66
 
67
  # Input fields for CV and job descriptions
68
  cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
69
- job_descriptions_text = gr.Textbox(label="Job Descriptions", placeholder="Enter the job descriptions text here", lines=10)
70
 
71
  # Button and output area
72
  match_button = gr.Button("Match CV to Job Descriptions")
@@ -74,6 +68,6 @@ with gr.Blocks() as demo:
74
  debug_output = gr.Textbox(label="Debug Info", lines=10) # Add a debug box to display debug info
75
 
76
  # Set button click to run the function
77
- match_button.click(fn=match_cv_to_jobs, inputs=[cv_text, job_descriptions_text], outputs=[output, debug_output])
78
 
79
  demo.launch()
 
1
+ from sentence_transformers import SentenceTransformer, util
2
+ from transformers import pipeline
 
3
  import gradio as gr
4
+ import nltk
5
 
6
+ # Load the SentenceTransformer model for sentence similarity
7
+ model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
 
 
8
 
9
+ # Load a summarization pipeline (you can use 'facebook/bart-large-cnn' or any other summarization model)
10
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 
 
11
 
12
+ # Download NLTK punkt tokenizer if not already installed (you may need to run this once)
13
+ nltk.download('punkt')
 
 
 
14
 
15
+ def summarize_text(text, max_length=100, min_length=25):
16
+ # Summarize the input text
17
+ summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
18
+ return summary[0]["summary_text"]
19
+
20
+ def match_cv_to_jobs(cv_text, job_descriptions):
21
  debug_info = "Debug Info:\n"
22
  results = []
23
 
24
+ # Summarize the CV text and the job descriptions
25
+ summarized_cv = summarize_text(cv_text, max_length=150)
26
  debug_info += f"Summarized CV Text: {summarized_cv}\n"
27
 
28
+ summarized_job_desc = summarize_text(job_descriptions, max_length=150)
29
+ debug_info += f"Summarized Job Description Text: {summarized_job_desc}\n"
30
+
31
+ # Encode the summarized CV text
32
+ cv_embedding = model.encode(summarized_cv, convert_to_tensor=True)
33
+ debug_info += f"CV Embedding: {cv_embedding}\n"
34
 
35
+ # Split summarized job description into sentences
36
+ description_sentences = nltk.tokenize.sent_tokenize(summarized_job_desc)
 
 
 
 
37
 
38
+ for sentence in description_sentences:
39
+ # Encode each sentence from the summarized job description
40
+ sentence_embedding = model.encode(sentence, convert_to_tensor=True)
41
+ debug_info += f"\nJob Description Sentence Embedding: {sentence_embedding}\n"
 
 
42
 
43
+ # Compute similarity score
44
+ similarity_score = util.pytorch_cos_sim(cv_embedding, sentence_embedding).item()
45
+ debug_info += f"Similarity Score for sentence: {similarity_score}\n"
46
+
47
+ results.append({
48
+ "Job Description Sentence": sentence,
49
+ "Similarity Score": similarity_score
50
+ })
51
+
52
+ # Sort results by similarity score in descending order
53
+ results = sorted(results, key=lambda x: x["Similarity Score"], reverse=True)
54
 
55
  return results, debug_info
56
 
57
  # Gradio interface
58
  with gr.Blocks() as demo:
59
+ gr.Markdown("# CV and Job Description Matcher with Summarization and Sentence Similarity")
60
 
61
  # Input fields for CV and job descriptions
62
  cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
63
+ job_descriptions = gr.Textbox(label="Job Descriptions", placeholder="Enter the entire job description text here", lines=10)
64
 
65
  # Button and output area
66
  match_button = gr.Button("Match CV to Job Descriptions")
 
68
  debug_output = gr.Textbox(label="Debug Info", lines=10) # Add a debug box to display debug info
69
 
70
  # Set button click to run the function
71
+ match_button.click(fn=match_cv_to_jobs, inputs=[cv_text, job_descriptions], outputs=[output, debug_output])
72
 
73
  demo.launch()
requirements.txt CHANGED
@@ -3,6 +3,6 @@ uvicorn
3
  transformers
4
  torch
5
  pydantic
6
- optimum[openvino]
7
- neural-compressor
8
- accelerate
 
3
  transformers
4
  torch
5
  pydantic
6
+ accelerate
7
+ sentence_transformer
8
+ nltk