Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,8 +3,8 @@ from pydantic import BaseModel
|
|
3 |
from typing import Optional
|
4 |
import torch
|
5 |
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
|
6 |
-
from transformers import BitsAndBytesConfig # เพิ่มการ import นี้
|
7 |
import logging
|
|
|
8 |
|
9 |
# ตั้งค่า logging
|
10 |
logging.basicConfig(level=logging.INFO)
|
@@ -16,18 +16,33 @@ try:
|
|
16 |
model_name = "scb10x/llama-3-typhoon-v1.5-8b-instruct"
|
17 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
18 |
|
19 |
-
#
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
22 |
|
23 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
model = AutoModelForCausalLM.from_pretrained(
|
25 |
model_name,
|
26 |
-
|
27 |
-
|
28 |
-
torch_dtype=torch.float16,
|
29 |
)
|
30 |
-
|
|
|
|
|
31 |
except Exception as e:
|
32 |
logger.error(f"Error loading model: {str(e)}")
|
33 |
raise
|
@@ -45,7 +60,7 @@ async def webhook(query: Query):
|
|
45 |
raise HTTPException(status_code=400, detail="No query text provided")
|
46 |
|
47 |
prompt = f"Human: {user_query}\nAI:"
|
48 |
-
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(
|
49 |
|
50 |
with torch.no_grad():
|
51 |
output = model.generate(input_ids, max_new_tokens=100, temperature=0.7)
|
|
|
3 |
from typing import Optional
|
4 |
import torch
|
5 |
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
|
|
|
6 |
import logging
|
7 |
+
import os
|
8 |
|
9 |
# ตั้งค่า logging
|
10 |
logging.basicConfig(level=logging.INFO)
|
|
|
16 |
model_name = "scb10x/llama-3-typhoon-v1.5-8b-instruct"
|
17 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
18 |
|
19 |
+
# ตรวจสอบว่ามี GPU หรือไม่
|
20 |
+
if torch.cuda.is_available():
|
21 |
+
logger.info("GPU is available. Using CUDA.")
|
22 |
+
device = "cuda"
|
23 |
+
else:
|
24 |
+
logger.info("No GPU found. Using CPU.")
|
25 |
+
device = "cpu"
|
26 |
|
27 |
+
# กำหนดการตั้งค่าสำหรับการโหลดโมเดล
|
28 |
+
model_kwargs = {
|
29 |
+
"torch_dtype": torch.float32 if device == "cpu" else torch.float16,
|
30 |
+
"low_cpu_mem_usage": True,
|
31 |
+
}
|
32 |
+
|
33 |
+
if device == "cuda":
|
34 |
+
from transformers import BitsAndBytesConfig
|
35 |
+
model_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_8bit=True)
|
36 |
+
|
37 |
+
# โหลดโมเดล
|
38 |
model = AutoModelForCausalLM.from_pretrained(
|
39 |
model_name,
|
40 |
+
device_map="auto" if device == "cuda" else None,
|
41 |
+
**model_kwargs
|
|
|
42 |
)
|
43 |
+
|
44 |
+
model.to(device)
|
45 |
+
logger.info(f"Model loaded successfully on {device}")
|
46 |
except Exception as e:
|
47 |
logger.error(f"Error loading model: {str(e)}")
|
48 |
raise
|
|
|
60 |
raise HTTPException(status_code=400, detail="No query text provided")
|
61 |
|
62 |
prompt = f"Human: {user_query}\nAI:"
|
63 |
+
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
|
64 |
|
65 |
with torch.no_grad():
|
66 |
output = model.generate(input_ids, max_new_tokens=100, temperature=0.7)
|