Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
4 |
|
5 |
model_name = "hosseinhimself/ISANG-v1.0-8B"
|
6 |
|
7 |
-
#
|
8 |
torch.set_default_device("cpu")
|
9 |
|
10 |
# Load tokenizer globally
|
@@ -12,12 +12,12 @@ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
|
12 |
|
13 |
def load_model():
|
14 |
try:
|
15 |
-
# Load the model
|
16 |
model = AutoModelForCausalLM.from_pretrained(
|
17 |
model_name,
|
18 |
-
torch_dtype=torch.float32, #
|
19 |
trust_remote_code=True,
|
20 |
-
low_cpu_mem_usage=True #
|
21 |
)
|
22 |
model.to("cpu") # Explicitly load the model on CPU
|
23 |
print("Model loaded successfully on CPU.")
|
@@ -26,7 +26,7 @@ def load_model():
|
|
26 |
print(f"Error loading model: {e}")
|
27 |
raise
|
28 |
|
29 |
-
def
|
30 |
model = load_model()
|
31 |
# Add system prompt
|
32 |
system_prompt = "You are ISANG, a multilingual large language model made by ISANG AI. You only respond in Persian, Korean, or English. If a user uses one of these languages, reply in the same language."
|
@@ -37,26 +37,41 @@ def chat(prompt, history):
|
|
37 |
context += f"User: {user_message}\nBot: {bot_message}\n"
|
38 |
context += f"User: {prompt}\nBot:"
|
39 |
|
40 |
-
# Generate a response
|
41 |
inputs = tokenizer(context, return_tensors="pt", truncation=True, max_length=512)
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
-
|
47 |
-
|
48 |
-
history.append((prompt, response))
|
49 |
-
return history, response
|
50 |
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
title="ISANG Chatbot",
|
54 |
description="This is a chatbot powered by the ISANG model. Enter your messages to chat with it!",
|
55 |
examples=[
|
56 |
["سلام، چطوری؟"],
|
57 |
["برام یه داستان تعریف کن"],
|
58 |
["نظرت درباره هوش مصنوعی چیه؟"]
|
59 |
-
]
|
|
|
60 |
)
|
61 |
|
62 |
if __name__ == "__main__":
|
|
|
4 |
|
5 |
model_name = "hosseinhimself/ISANG-v1.0-8B"
|
6 |
|
7 |
+
# Ensure CUDA is not used
|
8 |
torch.set_default_device("cpu")
|
9 |
|
10 |
# Load tokenizer globally
|
|
|
12 |
|
13 |
def load_model():
|
14 |
try:
|
15 |
+
# Load the model without `bitsandbytes` or CUDA
|
16 |
model = AutoModelForCausalLM.from_pretrained(
|
17 |
model_name,
|
18 |
+
torch_dtype=torch.float32, # Use standard float32 for CPU
|
19 |
trust_remote_code=True,
|
20 |
+
low_cpu_mem_usage=True # Optimize for CPU
|
21 |
)
|
22 |
model.to("cpu") # Explicitly load the model on CPU
|
23 |
print("Model loaded successfully on CPU.")
|
|
|
26 |
print(f"Error loading model: {e}")
|
27 |
raise
|
28 |
|
29 |
+
def stream_chat(prompt, history):
|
30 |
model = load_model()
|
31 |
# Add system prompt
|
32 |
system_prompt = "You are ISANG, a multilingual large language model made by ISANG AI. You only respond in Persian, Korean, or English. If a user uses one of these languages, reply in the same language."
|
|
|
37 |
context += f"User: {user_message}\nBot: {bot_message}\n"
|
38 |
context += f"User: {prompt}\nBot:"
|
39 |
|
40 |
+
# Generate a response incrementally
|
41 |
inputs = tokenizer(context, return_tensors="pt", truncation=True, max_length=512)
|
42 |
+
output_ids = model.generate(
|
43 |
+
**inputs,
|
44 |
+
max_new_tokens=200,
|
45 |
+
temperature=0.7,
|
46 |
+
do_sample=True,
|
47 |
+
return_dict_in_generate=True,
|
48 |
+
output_scores=False
|
49 |
+
)
|
50 |
|
51 |
+
response_ids = output_ids.sequences[0]
|
52 |
+
decoded_text = tokenizer.decode(response_ids, skip_special_tokens=True)
|
|
|
|
|
53 |
|
54 |
+
# Stream response word by word
|
55 |
+
response = decoded_text[len(context):].strip()
|
56 |
+
words = response.split()
|
57 |
+
history.append((prompt, "")) # Add the prompt to history with an empty response initially
|
58 |
+
for i, word in enumerate(words):
|
59 |
+
# Append the next word to the history
|
60 |
+
history[-1] = (prompt, " ".join(words[: i + 1]))
|
61 |
+
yield history, " ".join(words[: i + 1]) # Stream the current response
|
62 |
+
|
63 |
+
gradio_app = gr.Interface(
|
64 |
+
fn=stream_chat,
|
65 |
+
inputs=[gr.Textbox(lines=2, placeholder="Enter your message here..."), "state"],
|
66 |
+
outputs=["state", "text"],
|
67 |
title="ISANG Chatbot",
|
68 |
description="This is a chatbot powered by the ISANG model. Enter your messages to chat with it!",
|
69 |
examples=[
|
70 |
["سلام، چطوری؟"],
|
71 |
["برام یه داستان تعریف کن"],
|
72 |
["نظرت درباره هوش مصنوعی چیه؟"]
|
73 |
+
],
|
74 |
+
live=True # Enable live streaming for Gradio
|
75 |
)
|
76 |
|
77 |
if __name__ == "__main__":
|