Spaces:
Sleeping
Sleeping
from transformers import AutoTokenizer | |
from optimum.intel import INCModelForSeq2SeqLM | |
from optimum.intel.openvino import OVModelForCausalLM | |
import gradio as gr | |
import json | |
# Load OpenVINO GPT-J model for causal language modeling | |
causal_model_id = "OpenVINO/gpt-j-6b-int4-ov" | |
tokenizer = AutoTokenizer.from_pretrained(causal_model_id) | |
causal_model = OVModelForCausalLM.from_pretrained(causal_model_id) | |
# Load the Intel quantized summarization model | |
summarizer_model_id = "Intel/distilbart-cnn-12-6-int8-dynamic" | |
tokenizer_summarizer = AutoTokenizer.from_pretrained(summarizer_model_id) | |
int8_model = INCModelForSeq2SeqLM.from_pretrained(summarizer_model_id) | |
def summarize_text(text, max_length=100): | |
inputs = tokenizer_summarizer(text, return_tensors="pt", max_length=512, truncation=True) | |
summary_ids = int8_model.generate(inputs.input_ids, max_length=max_length, min_length=25, do_sample=False) | |
summary = tokenizer_summarizer.decode(summary_ids[0], skip_special_tokens=True) | |
return summary | |
def match_cv_to_jobs(cv_text, job_descriptions_text): | |
debug_info = "Debug Info:\n" | |
results = [] | |
# Summarize the CV text | |
summarized_cv = summarize_text(cv_text, max_length=400) | |
debug_info += f"Summarized CV Text: {summarized_cv}\n" | |
# Summarize all job descriptions at once | |
summarized_descriptions = summarize_text(job_descriptions_text, max_length=400) | |
debug_info += f"Summarized Job Descriptions: {summarized_descriptions}\n" | |
# Create a prompt to compare the summarized CV with the summarized job descriptions | |
prompt = ( | |
f"Compare the following job descriptions with this resume. Job Descriptions: {summarized_descriptions}. " | |
f"Resume: {summarized_cv}. Provide a match score and a brief analysis." | |
) | |
debug_info += f"\nGenerated Prompt: {prompt}\n" | |
# Generate response from the causal model | |
inputs = tokenizer(prompt, return_tensors="pt") | |
try: | |
outputs = causal_model.generate(**inputs, max_length=1024) | |
response_content = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] | |
debug_info += f"Model Response: {response_content}\n" | |
try: | |
response_data = json.loads(response_content) | |
results.append(response_data) | |
except json.JSONDecodeError: | |
results.append({ | |
"Job Descriptions": job_descriptions_text, | |
"Analysis": response_content | |
}) | |
except Exception as e: | |
debug_info += f"Error: {str(e)}\n" | |
results.append({"Job Descriptions": job_descriptions_text, "Error": str(e)}) | |
return results, debug_info | |
# Gradio interface | |
with gr.Blocks() as demo: | |
gr.Markdown("# CV and Job Description Matcher with Summarization and Debugging") | |
# Input fields for CV and job descriptions | |
cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10) | |
job_descriptions_text = gr.Textbox(label="Job Descriptions", placeholder="Enter the job descriptions text here", lines=10) | |
# Button and output area | |
match_button = gr.Button("Match CV to Job Descriptions") | |
output = gr.JSON(label="Match Results") | |
debug_output = gr.Textbox(label="Debug Info", lines=10) # Add a debug box to display debug info | |
# Set button click to run the function | |
match_button.click(fn=match_cv_to_jobs, inputs=[cv_text, job_descriptions_text], outputs=[output, debug_output]) | |
demo.launch() | |