Spaces:
Sleeping
Sleeping
saifeddinemk
commited on
Commit
•
62d4e66
1
Parent(s):
e91e514
Init app
Browse files
app.py
CHANGED
@@ -3,19 +3,23 @@ from pydantic import BaseModel
|
|
3 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
from peft import PeftModel, PeftConfig
|
5 |
import torch
|
6 |
-
|
|
|
|
|
7 |
app = FastAPI()
|
8 |
|
9 |
-
# Load model and tokenizer
|
10 |
base_model_name = "akjindal53244/Llama-3.1-Storm-8B"
|
11 |
peft_model_id = "LlamaFactoryAI/cv-job-description-matching"
|
12 |
|
13 |
-
base_model = AutoModelForCausalLM.from_pretrained(
|
14 |
-
|
|
|
|
|
|
|
|
|
15 |
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
|
20 |
# Define request model
|
21 |
class AnalysisRequest(BaseModel):
|
@@ -27,31 +31,19 @@ async def analyze(request: AnalysisRequest):
|
|
27 |
try:
|
28 |
# Prepare input text with formatted message
|
29 |
system_prompt = """
|
30 |
-
You are an advanced AI model designed to analyze the compatibility between a CV and a job description
|
31 |
-
|
32 |
-
1. matching_analysis: Analyze the CV against the job description to identify key strengths and gaps.
|
33 |
-
2. description: Summarize the relevance of the CV to the job description in a few concise sentences.
|
34 |
-
3. score: Provide a numerical compatibility score (0-100) based on qualifications, skills, and experience.
|
35 |
-
4. recommendation: Suggest actions for the candidate to improve their match or readiness for the role.
|
36 |
-
|
37 |
-
Your output must be in JSON format as follows:
|
38 |
-
{
|
39 |
-
"matching_analysis": "Your detailed analysis here.",
|
40 |
-
"description": "A brief summary here.",
|
41 |
-
"score": 85,
|
42 |
-
"recommendation": "Your suggestions here."
|
43 |
-
}
|
44 |
"""
|
45 |
user_input = f"<CV> {request.cv} </CV>\n<job_description> {request.job_description} </job_description>"
|
46 |
input_text = system_prompt + user_input
|
47 |
|
48 |
-
# Tokenize and generate response
|
49 |
-
inputs = tokenizer(input_text, return_tensors="pt")
|
50 |
-
|
|
|
|
|
51 |
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
52 |
|
53 |
return {"analysis": generated_text}
|
54 |
|
55 |
except Exception as e:
|
56 |
raise HTTPException(status_code=500, detail=str(e))
|
57 |
-
|
|
|
3 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
from peft import PeftModel, PeftConfig
|
5 |
import torch
|
6 |
+
import bitsandbytes as bnb
|
7 |
+
|
8 |
+
# Initialize FastAPI app
|
9 |
app = FastAPI()
|
10 |
|
11 |
+
# Load model and tokenizer in optimized mode at startup
|
12 |
base_model_name = "akjindal53244/Llama-3.1-Storm-8B"
|
13 |
peft_model_id = "LlamaFactoryAI/cv-job-description-matching"
|
14 |
|
15 |
+
base_model = AutoModelForCausalLM.from_pretrained(
|
16 |
+
base_model_name,
|
17 |
+
torch_dtype=torch.float16, # Use FP16 precision
|
18 |
+
load_in_8bit=True, # Enable Int8 quantization
|
19 |
+
device_map="auto" # Auto-manage device allocation
|
20 |
+
)
|
21 |
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
|
22 |
+
model = PeftModel.from_pretrained(base_model, peft_model_id, torch_dtype=torch.float16)
|
|
|
|
|
23 |
|
24 |
# Define request model
|
25 |
class AnalysisRequest(BaseModel):
|
|
|
31 |
try:
|
32 |
# Prepare input text with formatted message
|
33 |
system_prompt = """
|
34 |
+
You are an advanced AI model designed to analyze the compatibility between a CV and a job description...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
"""
|
36 |
user_input = f"<CV> {request.cv} </CV>\n<job_description> {request.job_description} </job_description>"
|
37 |
input_text = system_prompt + user_input
|
38 |
|
39 |
+
# Tokenize and generate response with memory optimizations
|
40 |
+
inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
|
41 |
+
|
42 |
+
with torch.no_grad(): # Disable gradients for inference
|
43 |
+
outputs = model.generate(**inputs, max_new_tokens=64) # Limit generated tokens
|
44 |
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
45 |
|
46 |
return {"analysis": generated_text}
|
47 |
|
48 |
except Exception as e:
|
49 |
raise HTTPException(status_code=500, detail=str(e))
|
|