Spaces:

saifeddinemk
/

cv_job

Sleeping

File size: 3,483 Bytes

7fb1b05
 
f8af8bb
9208e17
e079d59
b397dc0
732403f
7fb1b05
 
 
725f549
7fb1b05
 
 
 
96ed827
 
7fb1b05
 
 
 
96ed827
732403f
f65dc03
9208e17
 
f8af8bb
96ed827
 
 
732403f
 
 
 
 
 
 
7fb1b05
732403f
 
 
7fb1b05
732403f
 
7fb1b05
732403f
 
96ed827
 
732403f
 
 
 
 
 
 
 
 
 
9208e17
f65dc03
9208e17
 
 
96ed827
9208e17
e079d59
9208e17
732403f
9208e17
e079d59
 
9208e17
f65dc03
9f26a6c
e079d59
732403f
91207a8
9208e17

from transformers import AutoTokenizer
from optimum.intel import INCModelForSeq2SeqLM
from optimum.intel.openvino import OVModelForCausalLM
import gradio as gr
import json

# Load OpenVINO GPT-J model for causal language modeling
causal_model_id = "OpenVINO/gpt-j-6b-int4-ov"
tokenizer = AutoTokenizer.from_pretrained(causal_model_id)
causal_model = OVModelForCausalLM.from_pretrained(causal_model_id)

# Load the Intel quantized summarization model
summarizer_model_id = "Intel/distilbart-cnn-12-6-int8-dynamic"
tokenizer_summarizer = AutoTokenizer.from_pretrained(summarizer_model_id)
int8_model = INCModelForSeq2SeqLM.from_pretrained(summarizer_model_id)

def summarize_text(text, max_length=100):
    inputs = tokenizer_summarizer(text, return_tensors="pt", max_length=512, truncation=True)
    summary_ids = int8_model.generate(inputs.input_ids, max_length=max_length, min_length=25, do_sample=False)
    summary = tokenizer_summarizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

def match_cv_to_jobs(cv_text, job_descriptions_text):
    debug_info = "Debug Info:\n"
    results = []
    
    # Summarize the CV text
    summarized_cv = summarize_text(cv_text, max_length=400)
    debug_info += f"Summarized CV Text: {summarized_cv}\n"
    
    # Summarize all job descriptions at once
    summarized_descriptions = summarize_text(job_descriptions_text, max_length=400)
    debug_info += f"Summarized Job Descriptions: {summarized_descriptions}\n"
    
    # Create a prompt to compare the summarized CV with the summarized job descriptions
    prompt = (
        f"Compare the following job descriptions with this resume. Job Descriptions: {summarized_descriptions}. "
        f"Resume: {summarized_cv}. Provide a match score and a brief analysis."
    )
    debug_info += f"\nGenerated Prompt: {prompt}\n"
    
    # Generate response from the causal model
    inputs = tokenizer(prompt, return_tensors="pt")
    try:
        outputs = causal_model.generate(**inputs, max_length=200)
        response_content = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
        debug_info += f"Model Response: {response_content}\n"
        
        try:
            response_data = json.loads(response_content)
            results.append(response_data)
        except json.JSONDecodeError:
            results.append({
                "Job Descriptions": job_descriptions_text,
                "Analysis": response_content
            })
    except Exception as e:
        debug_info += f"Error: {str(e)}\n"
        results.append({"Job Descriptions": job_descriptions_text, "Error": str(e)})
    
    return results, debug_info

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# CV and Job Description Matcher with Summarization and Debugging")
    
    # Input fields for CV and job descriptions
    cv_text = gr.Textbox(label="CV Text", placeholder="Enter the CV text here", lines=10)
    job_descriptions_text = gr.Textbox(label="Job Descriptions", placeholder="Enter the job descriptions text here", lines=10)
    
    # Button and output area
    match_button = gr.Button("Match CV to Job Descriptions")
    output = gr.JSON(label="Match Results")
    debug_output = gr.Textbox(label="Debug Info", lines=10)  # Add a debug box to display debug info
    
    # Set button click to run the function
    match_button.click(fn=match_cv_to_jobs, inputs=[cv_text, job_descriptions_text], outputs=[output, debug_output])

demo.launch()