Sirawitch commited on
Commit
bff2feb
·
verified ·
1 Parent(s): e65e766

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -10
app.py CHANGED
@@ -3,8 +3,8 @@ from pydantic import BaseModel
3
  from typing import Optional
4
  import torch
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
6
- from transformers import BitsAndBytesConfig # เพิ่มการ import นี้
7
  import logging
 
8
 
9
  # ตั้งค่า logging
10
  logging.basicConfig(level=logging.INFO)
@@ -16,18 +16,33 @@ try:
16
  model_name = "scb10x/llama-3-typhoon-v1.5-8b-instruct"
17
  tokenizer = AutoTokenizer.from_pretrained(model_name)
18
 
19
- # ใช้ BitsAndBytes สำหรับ quantization
20
- config = AutoConfig.from_pretrained(model_name)
21
- config.quantization_config = BitsAndBytesConfig(load_in_8bit=True)
 
 
 
 
22
 
23
- # โหลดโมเดลด้วย 8-bit quantization
 
 
 
 
 
 
 
 
 
 
24
  model = AutoModelForCausalLM.from_pretrained(
25
  model_name,
26
- config=config,
27
- device_map="auto",
28
- torch_dtype=torch.float16,
29
  )
30
- logger.info("Model loaded successfully")
 
 
31
  except Exception as e:
32
  logger.error(f"Error loading model: {str(e)}")
33
  raise
@@ -45,7 +60,7 @@ async def webhook(query: Query):
45
  raise HTTPException(status_code=400, detail="No query text provided")
46
 
47
  prompt = f"Human: {user_query}\nAI:"
48
- input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
49
 
50
  with torch.no_grad():
51
  output = model.generate(input_ids, max_new_tokens=100, temperature=0.7)
 
3
  from typing import Optional
4
  import torch
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
 
6
  import logging
7
+ import os
8
 
9
  # ตั้งค่า logging
10
  logging.basicConfig(level=logging.INFO)
 
16
  model_name = "scb10x/llama-3-typhoon-v1.5-8b-instruct"
17
  tokenizer = AutoTokenizer.from_pretrained(model_name)
18
 
19
+ # ตรวจสอบว่ามี GPU หรือไม่
20
+ if torch.cuda.is_available():
21
+ logger.info("GPU is available. Using CUDA.")
22
+ device = "cuda"
23
+ else:
24
+ logger.info("No GPU found. Using CPU.")
25
+ device = "cpu"
26
 
27
+ # กำหนดการตั้งค่าสำหรับการโหลดโมเดล
28
+ model_kwargs = {
29
+ "torch_dtype": torch.float32 if device == "cpu" else torch.float16,
30
+ "low_cpu_mem_usage": True,
31
+ }
32
+
33
+ if device == "cuda":
34
+ from transformers import BitsAndBytesConfig
35
+ model_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_8bit=True)
36
+
37
+ # โหลดโมเดล
38
  model = AutoModelForCausalLM.from_pretrained(
39
  model_name,
40
+ device_map="auto" if device == "cuda" else None,
41
+ **model_kwargs
 
42
  )
43
+
44
+ model.to(device)
45
+ logger.info(f"Model loaded successfully on {device}")
46
  except Exception as e:
47
  logger.error(f"Error loading model: {str(e)}")
48
  raise
 
60
  raise HTTPException(status_code=400, detail="No query text provided")
61
 
62
  prompt = f"Human: {user_query}\nAI:"
63
+ input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
64
 
65
  with torch.no_grad():
66
  output = model.generate(input_ids, max_new_tokens=100, temperature=0.7)