File size: 2,131 Bytes
e91e514
9f26a6c
 
 
 
 
62d4e66
 
91207a8
 
9f26a6c
 
 
 
 
 
 
62d4e66
91207a8
9f26a6c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91207a8
9f26a6c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import torch
from fastapi import FastAPI, File, UploadFile, Form
from transformers import GPT2Tokenizer, GPT2Model
from sklearn.metrics.pairwise import cosine_similarity
from typing import List
from fastapi.middleware.cors import CORSMiddleware

# Initialize FastAPI app
app = FastAPI()

# Add CORS middleware to allow frontend access
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Load GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2Model.from_pretrained("gpt2")

# Helper function to get GPT-2 embeddings
def get_gpt2_embedding(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    outputs = model(**inputs)
    embeddings = torch.mean(outputs.last_hidden_state, dim=1).detach().numpy()
    return embeddings

# Helper function to calculate cosine similarity
def calculate_similarity(embedding1, embedding2):
    similarity = cosine_similarity(embedding1, embedding2)
    return similarity[0][0]

# Endpoint to upload CV file and store CV text
@app.post("/upload-cv/")
async def upload_cv(file: UploadFile = File(...)):
    content = await file.read()
    cv_text = content.decode("utf-8")
    return {"cv_text": cv_text}

# Endpoint to compare job descriptions with the CV text
@app.post("/compare/")
async def compare_job_cv(job_descriptions: str = Form(...), cv_text: str = Form(...)):
    # Generate embedding for the CV text
    cv_embedding = get_gpt2_embedding(cv_text)
    
    # Split job descriptions by line and calculate scores
    descriptions = job_descriptions.strip().split("\n")
    results = []
    
    for description in descriptions:
        job_embedding = get_gpt2_embedding(description)
        similarity_score = calculate_similarity(cv_embedding, job_embedding)
        
        # Append results
        results.append({
            "Job Description": description,
            "Match Score": round(similarity_score, 2)
        })
    
    return {"results": results}

# Run the app with `uvicorn <filename>:app --reload`