Spaces:

saifeddinemk
/

cv_job

Sleeping

App Files Files Community

saifeddinemk commited on 6 days ago

Commit

f5d2489

•

1 Parent(s): b0338f5

Init Commit

Browse files

Files changed (2) hide show

app.py +43 -49
requirements.txt +3 -3

app.py CHANGED Viewed

@@ -1,72 +1,66 @@
-from transformers import AutoTokenizer
-from optimum.intel import INCModelForSeq2SeqLM
-from optimum.intel.openvino import OVModelForCausalLM
 import gradio as gr
-import json
-# Load OpenVINO GPT-J model for causal language modeling
-causal_model_id = "OpenVINO/gpt-j-6b-int4-ov"
-tokenizer = AutoTokenizer.from_pretrained(causal_model_id)
-causal_model = OVModelForCausalLM.from_pretrained(causal_model_id)
-# Load the Intel quantized summarization model
-summarizer_model_id = "Intel/distilbart-cnn-12-6-int8-dynamic"
-tokenizer_summarizer = AutoTokenizer.from_pretrained(summarizer_model_id)
-int8_model = INCModelForSeq2SeqLM.from_pretrained(summarizer_model_id)
-def summarize_text(text, max_length=100):
-    inputs = tokenizer_summarizer(text, return_tensors="pt", max_length=512, truncation=True)
-    summary_ids = int8_model.generate(inputs.input_ids, max_length=max_length, min_length=25, do_sample=False)
-    summary = tokenizer_summarizer.decode(summary_ids[0], skip_special_tokens=True)
-    return summary
-def match_cv_to_jobs(cv_text, job_descriptions_text):
     debug_info = "Debug Info:\n"
     results = []
-    # Summarize the CV text
-    summarized_cv = summarize_text(cv_text, max_length=400)
     debug_info += f"Summarized CV Text: {summarized_cv}\n"
-    # Summarize all job descriptions at once
-    summarized_descriptions = summarize_text(job_descriptions_text, max_length=400)
-    debug_info += f"Summarized Job Descriptions: {summarized_descriptions}\n"
-    # Create a prompt to compare the summarized CV with the summarized job descriptions
-    prompt = (
-        f"Compare the following job descriptions with this resume. Job Descriptions: {summarized_descriptions}. "
-        f"Resume: {summarized_cv}. Provide a match score and a brief analysis."
-    )
-    debug_info += f"\nGenerated Prompt: {prompt}\n"
-    # Generate response from the causal model
-    inputs = tokenizer(prompt, return_tensors="pt")
-    try:
-        outputs = causal_model.generate(**inputs, max_length=1024)
-        response_content = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
-        debug_info += f"Model Response: {response_content}\n"
-        try:
-            response_data = json.loads(response_content)
-            results.append(response_data)
-        except json.JSONDecodeError:
-            results.append({
-                "Job Descriptions": job_descriptions_text,
-                "Analysis": response_content
-            })
-    except Exception as e:
-        debug_info += f"Error: {str(e)}\n"
-        results.append({"Job Descriptions": job_descriptions_text, "Error": str(e)})
     return results, debug_info
 # Gradio interface
 with gr.Blocks() as demo:
-    gr.Markdown("# CV and Job Description Matcher with Summarization and Debugging")
     # Input fields for CV and job descriptions
     cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
-    job_descriptions_text = gr.Textbox(label="Job Descriptions", placeholder="Enter the job descriptions text here", lines=10)
     # Button and output area
     match_button = gr.Button("Match CV to Job Descriptions")
@@ -74,6 +68,6 @@ with gr.Blocks() as demo:
     debug_output = gr.Textbox(label="Debug Info", lines=10)  # Add a debug box to display debug info
     # Set button click to run the function
-    match_button.click(fn=match_cv_to_jobs, inputs=[cv_text, job_descriptions_text], outputs=[output, debug_output])
 demo.launch()

+from sentence_transformers import SentenceTransformer, util
+from transformers import pipeline
 import gradio as gr
+import nltk
+# Load the SentenceTransformer model for sentence similarity
+model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
+# Load a summarization pipeline (you can use 'facebook/bart-large-cnn' or any other summarization model)
+summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+# Download NLTK punkt tokenizer if not already installed (you may need to run this once)
+nltk.download('punkt')
+def summarize_text(text, max_length=100, min_length=25):
+    # Summarize the input text
+    summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
+    return summary[0]["summary_text"]
+def match_cv_to_jobs(cv_text, job_descriptions):
     debug_info = "Debug Info:\n"
     results = []
+    # Summarize the CV text and the job descriptions
+    summarized_cv = summarize_text(cv_text, max_length=150)
     debug_info += f"Summarized CV Text: {summarized_cv}\n"
+    summarized_job_desc = summarize_text(job_descriptions, max_length=150)
+    debug_info += f"Summarized Job Description Text: {summarized_job_desc}\n"
+    # Encode the summarized CV text
+    cv_embedding = model.encode(summarized_cv, convert_to_tensor=True)
+    debug_info += f"CV Embedding: {cv_embedding}\n"
+    # Split summarized job description into sentences
+    description_sentences = nltk.tokenize.sent_tokenize(summarized_job_desc)
+    for sentence in description_sentences:
+        # Encode each sentence from the summarized job description
+        sentence_embedding = model.encode(sentence, convert_to_tensor=True)
+        debug_info += f"\nJob Description Sentence Embedding: {sentence_embedding}\n"
+        # Compute similarity score
+        similarity_score = util.pytorch_cos_sim(cv_embedding, sentence_embedding).item()
+        debug_info += f"Similarity Score for sentence: {similarity_score}\n"
+        results.append({
+            "Job Description Sentence": sentence,
+            "Similarity Score": similarity_score
+        })
+    # Sort results by similarity score in descending order
+    results = sorted(results, key=lambda x: x["Similarity Score"], reverse=True)
     return results, debug_info
 # Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("# CV and Job Description Matcher with Summarization and Sentence Similarity")
     # Input fields for CV and job descriptions
     cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
+    job_descriptions = gr.Textbox(label="Job Descriptions", placeholder="Enter the entire job description text here", lines=10)
     # Button and output area
     match_button = gr.Button("Match CV to Job Descriptions")
     debug_output = gr.Textbox(label="Debug Info", lines=10)  # Add a debug box to display debug info
     # Set button click to run the function
+    match_button.click(fn=match_cv_to_jobs, inputs=[cv_text, job_descriptions], outputs=[output, debug_output])
 demo.launch()

requirements.txt CHANGED Viewed

@@ -3,6 +3,6 @@ uvicorn
 transformers
 torch
 pydantic
-optimum[openvino]
-neural-compressor
-accelerate

 transformers
 torch
 pydantic
+accelerate
+sentence_transformer
+nltk