saifeddinemk commited on
Commit
62d4e66
1 Parent(s): e91e514
Files changed (1) hide show
  1. app.py +17 -25
app.py CHANGED
@@ -3,19 +3,23 @@ from pydantic import BaseModel
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  from peft import PeftModel, PeftConfig
5
  import torch
6
- # Initialize the FastAPI app
 
 
7
  app = FastAPI()
8
 
9
- # Load model and tokenizer once at startup
10
  base_model_name = "akjindal53244/Llama-3.1-Storm-8B"
11
  peft_model_id = "LlamaFactoryAI/cv-job-description-matching"
12
 
13
- base_model = AutoModelForCausalLM.from_pretrained(base_model_name, torch_dtype=torch.float16)
14
- model = PeftModel.from_pretrained(base_model, peft_model_id, torch_dtype=torch.float16)
 
 
 
 
15
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
16
- config = PeftConfig.from_pretrained(peft_model_id)
17
-
18
-
19
 
20
  # Define request model
21
  class AnalysisRequest(BaseModel):
@@ -27,31 +31,19 @@ async def analyze(request: AnalysisRequest):
27
  try:
28
  # Prepare input text with formatted message
29
  system_prompt = """
30
- You are an advanced AI model designed to analyze the compatibility between a CV and a job description. You will receive a CV and a job description. Your task is to output a structured JSON format that includes the following:
31
-
32
- 1. matching_analysis: Analyze the CV against the job description to identify key strengths and gaps.
33
- 2. description: Summarize the relevance of the CV to the job description in a few concise sentences.
34
- 3. score: Provide a numerical compatibility score (0-100) based on qualifications, skills, and experience.
35
- 4. recommendation: Suggest actions for the candidate to improve their match or readiness for the role.
36
-
37
- Your output must be in JSON format as follows:
38
- {
39
- "matching_analysis": "Your detailed analysis here.",
40
- "description": "A brief summary here.",
41
- "score": 85,
42
- "recommendation": "Your suggestions here."
43
- }
44
  """
45
  user_input = f"<CV> {request.cv} </CV>\n<job_description> {request.job_description} </job_description>"
46
  input_text = system_prompt + user_input
47
 
48
- # Tokenize and generate response
49
- inputs = tokenizer(input_text, return_tensors="pt")
50
- outputs = model.generate(**inputs, max_new_tokens=64)
 
 
51
  generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
52
 
53
  return {"analysis": generated_text}
54
 
55
  except Exception as e:
56
  raise HTTPException(status_code=500, detail=str(e))
57
-
 
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  from peft import PeftModel, PeftConfig
5
  import torch
6
+ import bitsandbytes as bnb
7
+
8
+ # Initialize FastAPI app
9
  app = FastAPI()
10
 
11
+ # Load model and tokenizer in optimized mode at startup
12
  base_model_name = "akjindal53244/Llama-3.1-Storm-8B"
13
  peft_model_id = "LlamaFactoryAI/cv-job-description-matching"
14
 
15
+ base_model = AutoModelForCausalLM.from_pretrained(
16
+ base_model_name,
17
+ torch_dtype=torch.float16, # Use FP16 precision
18
+ load_in_8bit=True, # Enable Int8 quantization
19
+ device_map="auto" # Auto-manage device allocation
20
+ )
21
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
22
+ model = PeftModel.from_pretrained(base_model, peft_model_id, torch_dtype=torch.float16)
 
 
23
 
24
  # Define request model
25
  class AnalysisRequest(BaseModel):
 
31
  try:
32
  # Prepare input text with formatted message
33
  system_prompt = """
34
+ You are an advanced AI model designed to analyze the compatibility between a CV and a job description...
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  """
36
  user_input = f"<CV> {request.cv} </CV>\n<job_description> {request.job_description} </job_description>"
37
  input_text = system_prompt + user_input
38
 
39
+ # Tokenize and generate response with memory optimizations
40
+ inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
41
+
42
+ with torch.no_grad(): # Disable gradients for inference
43
+ outputs = model.generate(**inputs, max_new_tokens=64) # Limit generated tokens
44
  generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
45
 
46
  return {"analysis": generated_text}
47
 
48
  except Exception as e:
49
  raise HTTPException(status_code=500, detail=str(e))