import torch from fastapi import FastAPI, File, UploadFile, Form from transformers import GPT2Tokenizer, GPT2Model from sklearn.metrics.pairwise import cosine_similarity from typing import List from fastapi.middleware.cors import CORSMiddleware # Initialize FastAPI app app = FastAPI() # Add CORS middleware to allow frontend access app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Load GPT-2 model and tokenizer tokenizer = GPT2Tokenizer.from_pretrained("gpt2") model = GPT2Model.from_pretrained("gpt2") # Helper function to get GPT-2 embeddings def get_gpt2_embedding(text): inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512) outputs = model(**inputs) embeddings = torch.mean(outputs.last_hidden_state, dim=1).detach().numpy() return embeddings # Helper function to calculate cosine similarity def calculate_similarity(embedding1, embedding2): similarity = cosine_similarity(embedding1, embedding2) return similarity[0][0] # Endpoint to upload CV file and store CV text @app.post("/upload-cv/") async def upload_cv(file: UploadFile = File(...)): content = await file.read() cv_text = content.decode("utf-8") return {"cv_text": cv_text} # Endpoint to compare job descriptions with the CV text @app.post("/compare/") async def compare_job_cv(job_descriptions: str = Form(...), cv_text: str = Form(...)): # Generate embedding for the CV text cv_embedding = get_gpt2_embedding(cv_text) # Split job descriptions by line and calculate scores descriptions = job_descriptions.strip().split("\n") results = [] for description in descriptions: job_embedding = get_gpt2_embedding(description) similarity_score = calculate_similarity(cv_embedding, job_embedding) # Append results results.append({ "Job Description": description, "Match Score": round(similarity_score, 2) }) return {"results": results} # Run the app with `uvicorn :app --reload`