Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,7 +6,7 @@ import gradio as gr
|
|
| 6 |
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
|
| 7 |
import torch
|
| 8 |
from threading import Thread
|
| 9 |
-
|
| 10 |
|
| 11 |
# Load model directly
|
| 12 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
@@ -14,6 +14,7 @@ tokenizer = AutoTokenizer.from_pretrained("Navid-AI/Mulhem-1-Mini", token=os.get
|
|
| 14 |
model = AutoModelForCausalLM.from_pretrained("Navid-AI/Mulhem-1-Mini", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", token=os.getenv("HF_TOKEN")).to(device)
|
| 15 |
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
| 16 |
|
|
|
|
| 17 |
def respond(
|
| 18 |
message,
|
| 19 |
history: list[tuple[str, str]],
|
|
|
|
| 6 |
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
|
| 7 |
import torch
|
| 8 |
from threading import Thread
|
| 9 |
+
import spaces
|
| 10 |
|
| 11 |
# Load model directly
|
| 12 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 14 |
model = AutoModelForCausalLM.from_pretrained("Navid-AI/Mulhem-1-Mini", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", token=os.getenv("HF_TOKEN")).to(device)
|
| 15 |
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
| 16 |
|
| 17 |
+
@spaces.GPU
|
| 18 |
def respond(
|
| 19 |
message,
|
| 20 |
history: list[tuple[str, str]],
|