Spaces:
Sleeping
Sleeping
File size: 3,484 Bytes
7fb1b05 f8af8bb 9208e17 e079d59 b397dc0 732403f 7fb1b05 725f549 7fb1b05 96ed827 7fb1b05 96ed827 732403f f65dc03 9208e17 f8af8bb 96ed827 732403f 7fb1b05 732403f 7fb1b05 732403f b0338f5 732403f 96ed827 732403f 9208e17 f65dc03 9208e17 96ed827 9208e17 e079d59 9208e17 732403f 9208e17 e079d59 9208e17 f65dc03 9f26a6c e079d59 732403f 91207a8 9208e17 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
from transformers import AutoTokenizer
from optimum.intel import INCModelForSeq2SeqLM
from optimum.intel.openvino import OVModelForCausalLM
import gradio as gr
import json
# Load OpenVINO GPT-J model for causal language modeling
causal_model_id = "OpenVINO/gpt-j-6b-int4-ov"
tokenizer = AutoTokenizer.from_pretrained(causal_model_id)
causal_model = OVModelForCausalLM.from_pretrained(causal_model_id)
# Load the Intel quantized summarization model
summarizer_model_id = "Intel/distilbart-cnn-12-6-int8-dynamic"
tokenizer_summarizer = AutoTokenizer.from_pretrained(summarizer_model_id)
int8_model = INCModelForSeq2SeqLM.from_pretrained(summarizer_model_id)
def summarize_text(text, max_length=100):
inputs = tokenizer_summarizer(text, return_tensors="pt", max_length=512, truncation=True)
summary_ids = int8_model.generate(inputs.input_ids, max_length=max_length, min_length=25, do_sample=False)
summary = tokenizer_summarizer.decode(summary_ids[0], skip_special_tokens=True)
return summary
def match_cv_to_jobs(cv_text, job_descriptions_text):
debug_info = "Debug Info:\n"
results = []
# Summarize the CV text
summarized_cv = summarize_text(cv_text, max_length=400)
debug_info += f"Summarized CV Text: {summarized_cv}\n"
# Summarize all job descriptions at once
summarized_descriptions = summarize_text(job_descriptions_text, max_length=400)
debug_info += f"Summarized Job Descriptions: {summarized_descriptions}\n"
# Create a prompt to compare the summarized CV with the summarized job descriptions
prompt = (
f"Compare the following job descriptions with this resume. Job Descriptions: {summarized_descriptions}. "
f"Resume: {summarized_cv}. Provide a match score and a brief analysis."
)
debug_info += f"\nGenerated Prompt: {prompt}\n"
# Generate response from the causal model
inputs = tokenizer(prompt, return_tensors="pt")
try:
outputs = causal_model.generate(**inputs, max_length=1024)
response_content = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
debug_info += f"Model Response: {response_content}\n"
try:
response_data = json.loads(response_content)
results.append(response_data)
except json.JSONDecodeError:
results.append({
"Job Descriptions": job_descriptions_text,
"Analysis": response_content
})
except Exception as e:
debug_info += f"Error: {str(e)}\n"
results.append({"Job Descriptions": job_descriptions_text, "Error": str(e)})
return results, debug_info
# Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# CV and Job Description Matcher with Summarization and Debugging")
# Input fields for CV and job descriptions
cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
job_descriptions_text = gr.Textbox(label="Job Descriptions", placeholder="Enter the job descriptions text here", lines=10)
# Button and output area
match_button = gr.Button("Match CV to Job Descriptions")
output = gr.JSON(label="Match Results")
debug_output = gr.Textbox(label="Debug Info", lines=10) # Add a debug box to display debug info
# Set button click to run the function
match_button.click(fn=match_cv_to_jobs, inputs=[cv_text, job_descriptions_text], outputs=[output, debug_output])
demo.launch()
|