saifeddinemk commited on
Commit
9f26a6c
1 Parent(s): 08ba70e
Files changed (2) hide show
  1. app.py +57 -41
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,49 +1,65 @@
1
- from fastapi import FastAPI, HTTPException
2
- from pydantic import BaseModel
3
- from transformers import AutoModelForCausalLM, AutoTokenizer
4
- from peft import PeftModel, PeftConfig
5
  import torch
 
 
 
 
 
6
 
7
  # Initialize FastAPI app
8
  app = FastAPI()
9
 
10
- # Load model and tokenizer in optimized mode at startup
11
- base_model_name = "akjindal53244/Llama-3.1-Storm-8B"
12
- peft_model_id = "LlamaFactoryAI/cv-job-description-matching"
13
-
14
- # Load the model with FP16 precision on CUDA if available
15
- device = "cuda" if torch.cuda.is_available() else "cpu"
16
- base_model = AutoModelForCausalLM.from_pretrained(
17
- base_model_name,
18
- torch_dtype=torch.float16 if device == "cuda" else torch.float16,
19
- device_map="auto" if device == "cuda" else None
20
  )
21
- tokenizer = AutoTokenizer.from_pretrained(base_model_name)
22
- model = PeftModel.from_pretrained(base_model, peft_model_id, torch_dtype=torch.float16 if device == "cuda" else torch.float16)
23
-
24
- # Define request model
25
- class AnalysisRequest(BaseModel):
26
- cv: str
27
- job_description: str
28
-
29
- @app.post("/analyze")
30
- async def analyze(request: AnalysisRequest):
31
- try:
32
- # Prepare input text with formatted message
33
- system_prompt = """
34
- You are an advanced AI model designed to analyze the compatibility between a CV and a job description...
35
- """
36
- user_input = f"<CV> {request.cv} </CV>\n<job_description> {request.job_description} </job_description>"
37
- input_text = system_prompt + user_input
38
-
39
- # Tokenize and generate response with memory optimizations
40
- inputs = tokenizer(input_text, return_tensors="pt").to(device)
41
-
42
- with torch.no_grad(): # Disable gradients for inference
43
- outputs = model.generate(**inputs, max_new_tokens=32) # Limit generated tokens
44
- generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
45
 
46
- return {"analysis": generated_text}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- except Exception as e:
49
- raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
1
  import torch
2
+ from fastapi import FastAPI, File, UploadFile, Form
3
+ from transformers import GPT2Tokenizer, GPT2Model
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+ from typing import List
6
+ from fastapi.middleware.cors import CORSMiddleware
7
 
8
  # Initialize FastAPI app
9
  app = FastAPI()
10
 
11
+ # Add CORS middleware to allow frontend access
12
+ app.add_middleware(
13
+ CORSMiddleware,
14
+ allow_origins=["*"],
15
+ allow_credentials=True,
16
+ allow_methods=["*"],
17
+ allow_headers=["*"],
 
 
 
18
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ # Load GPT-2 model and tokenizer
21
+ tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
22
+ model = GPT2Model.from_pretrained("gpt2")
23
+
24
+ # Helper function to get GPT-2 embeddings
25
+ def get_gpt2_embedding(text):
26
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
27
+ outputs = model(**inputs)
28
+ embeddings = torch.mean(outputs.last_hidden_state, dim=1).detach().numpy()
29
+ return embeddings
30
+
31
+ # Helper function to calculate cosine similarity
32
+ def calculate_similarity(embedding1, embedding2):
33
+ similarity = cosine_similarity(embedding1, embedding2)
34
+ return similarity[0][0]
35
+
36
+ # Endpoint to upload CV file and store CV text
37
+ @app.post("/upload-cv/")
38
+ async def upload_cv(file: UploadFile = File(...)):
39
+ content = await file.read()
40
+ cv_text = content.decode("utf-8")
41
+ return {"cv_text": cv_text}
42
+
43
+ # Endpoint to compare job descriptions with the CV text
44
+ @app.post("/compare/")
45
+ async def compare_job_cv(job_descriptions: str = Form(...), cv_text: str = Form(...)):
46
+ # Generate embedding for the CV text
47
+ cv_embedding = get_gpt2_embedding(cv_text)
48
+
49
+ # Split job descriptions by line and calculate scores
50
+ descriptions = job_descriptions.strip().split("\n")
51
+ results = []
52
+
53
+ for description in descriptions:
54
+ job_embedding = get_gpt2_embedding(description)
55
+ similarity_score = calculate_similarity(cv_embedding, job_embedding)
56
+
57
+ # Append results
58
+ results.append({
59
+ "Job Description": description,
60
+ "Match Score": round(similarity_score, 2)
61
+ })
62
+
63
+ return {"results": results}
64
 
65
+ # Run the app with `uvicorn <filename>:app --reload`
 
requirements.txt CHANGED
@@ -3,3 +3,4 @@ uvicorn
3
  torch
4
  transformers
5
  scikit-learn
 
 
3
  torch
4
  transformers
5
  scikit-learn
6
+ peft