Spaces:

saifeddinemk
/

cv_job

Sleeping

File size: 3,484 Bytes

from transformers import AutoTokenizer
from optimum.intel import INCModelForSeq2SeqLM
from optimum.intel.openvino import OVModelForCausalLM
import gradio as gr
import json

# Load OpenVINO GPT-J model for causal language modeling
causal_model_id = "OpenVINO/gpt-j-6b-int4-ov"
tokenizer = AutoTokenizer.from_pretrained(causal_model_id)
causal_model = OVModelForCausalLM.from_pretrained(causal_model_id)

# Load the Intel quantized summarization model
summarizer_model_id = "Intel/distilbart-cnn-12-6-int8-dynamic"
tokenizer_summarizer = AutoTokenizer.from_pretrained(summarizer_model_id)
int8_model = INCModelForSeq2SeqLM.from_pretrained(summarizer_model_id)

def summarize_text(text, max_length=100):
    inputs = tokenizer_summarizer(text, return_tensors="pt", max_length=512, truncation=True)
    summary_ids = int8_model.generate(inputs.input_ids, max_length=max_length, min_length=25, do_sample=False)
    summary = tokenizer_summarizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

def match_cv_to_jobs(cv_text, job_descriptions_text):
    debug_info = "Debug Info:\n"
    results = []
    
    # Summarize the CV text
    summarized_cv = summarize_text(cv_text, max_length=400)
    debug_info += f"Summarized CV Text: {summarized_cv}\n"
    
    # Summarize all job descriptions at once
    summarized_descriptions = summarize_text(job_descriptions_text, max_length=400)
    debug_info += f"Summarized Job Descriptions: {summarized_descriptions}\n"
    
    # Create a prompt to compare the summarized CV with the summarized job descriptions
    prompt = (
        f"Compare the following job descriptions with this resume. Job Descriptions: {summarized_descriptions}. "
        f"Resume: {summarized_cv}. Provide a match score and a brief analysis."
    )
    debug_info += f"\nGenerated Prompt: {prompt}\n"
    
    # Generate response from the causal model
    inputs = tokenizer(prompt, return_tensors="pt")
    try:
        outputs = causal_model.generate(**inputs, max_length=1024)
        response_content = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
        debug_info += f"Model Response: {response_content}\n"
        
        try:
            response_data = json.loads(response_content)
            results.append(response_data)
        except json.JSONDecodeError:
            results.append({
                "Job Descriptions": job_descriptions_text,
                "Analysis": response_content
            })
    except Exception as e:
        debug_info += f"Error: {str(e)}\n"
        results.append({"Job Descriptions": job_descriptions_text, "Error": str(e)})
    
    return results, debug_info

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# CV and Job Description Matcher with Summarization and Debugging")
    
    # Input fields for CV and job descriptions
    cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
    job_descriptions_text = gr.Textbox(label="Job Descriptions", placeholder="Enter the job descriptions text here", lines=10)
    
    # Button and output area
    match_button = gr.Button("Match CV to Job Descriptions")
    output = gr.JSON(label="Match Results")
    debug_output = gr.Textbox(label="Debug Info", lines=10)  # Add a debug box to display debug info
    
    # Set button click to run the function
    match_button.click(fn=match_cv_to_jobs, inputs=[cv_text, job_descriptions_text], outputs=[output, debug_output])

demo.launch()