Spaces:
Sleeping
Sleeping
saifeddinemk
commited on
Commit
•
9f26a6c
1
Parent(s):
08ba70e
Init app
Browse files- app.py +57 -41
- requirements.txt +1 -0
app.py
CHANGED
@@ -1,49 +1,65 @@
|
|
1 |
-
from fastapi import FastAPI, HTTPException
|
2 |
-
from pydantic import BaseModel
|
3 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
-
from peft import PeftModel, PeftConfig
|
5 |
import torch
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
# Initialize FastAPI app
|
8 |
app = FastAPI()
|
9 |
|
10 |
-
#
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
base_model_name,
|
18 |
-
torch_dtype=torch.float16 if device == "cuda" else torch.float16,
|
19 |
-
device_map="auto" if device == "cuda" else None
|
20 |
)
|
21 |
-
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
|
22 |
-
model = PeftModel.from_pretrained(base_model, peft_model_id, torch_dtype=torch.float16 if device == "cuda" else torch.float16)
|
23 |
-
|
24 |
-
# Define request model
|
25 |
-
class AnalysisRequest(BaseModel):
|
26 |
-
cv: str
|
27 |
-
job_description: str
|
28 |
-
|
29 |
-
@app.post("/analyze")
|
30 |
-
async def analyze(request: AnalysisRequest):
|
31 |
-
try:
|
32 |
-
# Prepare input text with formatted message
|
33 |
-
system_prompt = """
|
34 |
-
You are an advanced AI model designed to analyze the compatibility between a CV and a job description...
|
35 |
-
"""
|
36 |
-
user_input = f"<CV> {request.cv} </CV>\n<job_description> {request.job_description} </job_description>"
|
37 |
-
input_text = system_prompt + user_input
|
38 |
-
|
39 |
-
# Tokenize and generate response with memory optimizations
|
40 |
-
inputs = tokenizer(input_text, return_tensors="pt").to(device)
|
41 |
-
|
42 |
-
with torch.no_grad(): # Disable gradients for inference
|
43 |
-
outputs = model.generate(**inputs, max_new_tokens=32) # Limit generated tokens
|
44 |
-
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
45 |
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
-
|
49 |
-
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
1 |
import torch
|
2 |
+
from fastapi import FastAPI, File, UploadFile, Form
|
3 |
+
from transformers import GPT2Tokenizer, GPT2Model
|
4 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
5 |
+
from typing import List
|
6 |
+
from fastapi.middleware.cors import CORSMiddleware
|
7 |
|
8 |
# Initialize FastAPI app
|
9 |
app = FastAPI()
|
10 |
|
11 |
+
# Add CORS middleware to allow frontend access
|
12 |
+
app.add_middleware(
|
13 |
+
CORSMiddleware,
|
14 |
+
allow_origins=["*"],
|
15 |
+
allow_credentials=True,
|
16 |
+
allow_methods=["*"],
|
17 |
+
allow_headers=["*"],
|
|
|
|
|
|
|
18 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
+
# Load GPT-2 model and tokenizer
|
21 |
+
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
22 |
+
model = GPT2Model.from_pretrained("gpt2")
|
23 |
+
|
24 |
+
# Helper function to get GPT-2 embeddings
|
25 |
+
def get_gpt2_embedding(text):
|
26 |
+
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
|
27 |
+
outputs = model(**inputs)
|
28 |
+
embeddings = torch.mean(outputs.last_hidden_state, dim=1).detach().numpy()
|
29 |
+
return embeddings
|
30 |
+
|
31 |
+
# Helper function to calculate cosine similarity
|
32 |
+
def calculate_similarity(embedding1, embedding2):
|
33 |
+
similarity = cosine_similarity(embedding1, embedding2)
|
34 |
+
return similarity[0][0]
|
35 |
+
|
36 |
+
# Endpoint to upload CV file and store CV text
|
37 |
+
@app.post("/upload-cv/")
|
38 |
+
async def upload_cv(file: UploadFile = File(...)):
|
39 |
+
content = await file.read()
|
40 |
+
cv_text = content.decode("utf-8")
|
41 |
+
return {"cv_text": cv_text}
|
42 |
+
|
43 |
+
# Endpoint to compare job descriptions with the CV text
|
44 |
+
@app.post("/compare/")
|
45 |
+
async def compare_job_cv(job_descriptions: str = Form(...), cv_text: str = Form(...)):
|
46 |
+
# Generate embedding for the CV text
|
47 |
+
cv_embedding = get_gpt2_embedding(cv_text)
|
48 |
+
|
49 |
+
# Split job descriptions by line and calculate scores
|
50 |
+
descriptions = job_descriptions.strip().split("\n")
|
51 |
+
results = []
|
52 |
+
|
53 |
+
for description in descriptions:
|
54 |
+
job_embedding = get_gpt2_embedding(description)
|
55 |
+
similarity_score = calculate_similarity(cv_embedding, job_embedding)
|
56 |
+
|
57 |
+
# Append results
|
58 |
+
results.append({
|
59 |
+
"Job Description": description,
|
60 |
+
"Match Score": round(similarity_score, 2)
|
61 |
+
})
|
62 |
+
|
63 |
+
return {"results": results}
|
64 |
|
65 |
+
# Run the app with `uvicorn <filename>:app --reload`
|
|
requirements.txt
CHANGED
@@ -3,3 +3,4 @@ uvicorn
|
|
3 |
torch
|
4 |
transformers
|
5 |
scikit-learn
|
|
|
|
3 |
torch
|
4 |
transformers
|
5 |
scikit-learn
|
6 |
+
peft
|