Spaces:

Harsha1845
/

Career_gap

Sleeping

File size: 2,835 Bytes

5109f64
db7d81e
 
 
 
5109f64
 
 
db7d81e
5109f64
 
 
 
 
 
 
 
db7d81e
5109f64
 
db7d81e
 
5109f64
db7d81e
 
5109f64
db7d81e
 
 
 
 
 
5109f64
db7d81e
 
 
5109f64
db7d81e
 
5109f64
db7d81e
 
 
 
 
 
5109f64
 
 
 
 
 
db7d81e
5109f64
db7d81e
5109f64

import gradio as gr
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neighbors import NearestNeighbors

# Load dataset (replace 'dataset.csv' with your file path)
file_path = 'Job_Roles_and_Skills.csv'
dataset = pd.read_csv(file_path, encoding='latin1') # or 'ISO-8859-1' or 'cp1252'

#print(dataset.head()) 

# Preprocessing: Extract required fields
# Assuming 'skills required' contains the required skills
# Replace column names if they differ in your dataset
# Preprocessing: Extract required fields
# Assuming 'skills required' contains the required skills
# Replace column names if they differ in your dataset
def preprocess_skills(dataset):
    # Check if the element is a string before applying split
    dataset["Skills Required"] = dataset["Skills Required"].apply(lambda x: [skill.strip().lower() for skill in x.split(",") if isinstance(x, str)] if isinstance(x, str) else x) 
    return dataset


dataset = preprocess_skills(dataset)

# Train a simple NearestNeighbors model to match job roles (optional, for extensibility)
vectorizer = CountVectorizer(tokenizer=lambda x: x, preprocessor=lambda x: x)
skill_matrix = vectorizer.fit_transform(dataset["Skills Required"])

nn_model = NearestNeighbors(n_neighbors=1, metric="cosine")
nn_model.fit(skill_matrix)

# Define function to find missing skills
def find_missing_skills(job_role, current_skills):
    current_skills = [skill.strip().lower() for skill in current_skills.split(",")]
    
    # Match the job role
    job_row = dataset[dataset["Job Role"].str.lower() == job_role.lower()]
    if job_row.empty:
        return f"Job Role '{job_role}' not found in the dataset. Please try another option."

    required_skills = job_row.iloc[0]["Skills Required"]
    missing_skills = [skill for skill in required_skills if skill not in current_skills]

    return missing_skills if missing_skills else "No missing skills! You are fully qualified."

# Define the Gradio interface
def career_gap_analysis(job_role, current_skills):
    missing_skills = find_missing_skills(job_role, current_skills)
    if isinstance(missing_skills, list):
        return f"Missing Skills for '{job_role}': {', '.join(missing_skills)}"
    return missing_skills

# Extract unique job roles for dropdown options
job_roles = dataset["Job Role"].unique().tolist()

# Gradio App
# Gradio App
demo = gr.Interface(
    fn=career_gap_analysis,
    inputs=[
        gr.Dropdown(label="Job Role", choices=job_roles),  # Remove placeholder
        gr.Textbox(label="Current Skills", placeholder="Enter your current skills separated by commas (e.g., Python, SQL)"),
    ],
    outputs="text",
    title="Career Gap Analysis",
    description="Identify missing skills for a specific job role based on your current skill set."
)

# Launch the app
demo.launch()