cv_job / app.py
saifeddinemk's picture
Init app
9f26a6c
raw
history blame
2.13 kB
import torch
from fastapi import FastAPI, File, UploadFile, Form
from transformers import GPT2Tokenizer, GPT2Model
from sklearn.metrics.pairwise import cosine_similarity
from typing import List
from fastapi.middleware.cors import CORSMiddleware
# Initialize FastAPI app
app = FastAPI()
# Add CORS middleware to allow frontend access
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Load GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2Model.from_pretrained("gpt2")
# Helper function to get GPT-2 embeddings
def get_gpt2_embedding(text):
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
outputs = model(**inputs)
embeddings = torch.mean(outputs.last_hidden_state, dim=1).detach().numpy()
return embeddings
# Helper function to calculate cosine similarity
def calculate_similarity(embedding1, embedding2):
similarity = cosine_similarity(embedding1, embedding2)
return similarity[0][0]
# Endpoint to upload CV file and store CV text
@app.post("/upload-cv/")
async def upload_cv(file: UploadFile = File(...)):
content = await file.read()
cv_text = content.decode("utf-8")
return {"cv_text": cv_text}
# Endpoint to compare job descriptions with the CV text
@app.post("/compare/")
async def compare_job_cv(job_descriptions: str = Form(...), cv_text: str = Form(...)):
# Generate embedding for the CV text
cv_embedding = get_gpt2_embedding(cv_text)
# Split job descriptions by line and calculate scores
descriptions = job_descriptions.strip().split("\n")
results = []
for description in descriptions:
job_embedding = get_gpt2_embedding(description)
similarity_score = calculate_similarity(cv_embedding, job_embedding)
# Append results
results.append({
"Job Description": description,
"Match Score": round(similarity_score, 2)
})
return {"results": results}
# Run the app with `uvicorn <filename>:app --reload`