hosseinhimself commited on
Commit
02d455b
·
verified ·
1 Parent(s): 55cf123

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -2
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import gradio as gr
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import spaces
@@ -61,4 +61,66 @@ gradio_app = gr.ChatInterface(
61
  )
62
 
63
  if __name__ == "__main__":
64
- gradio_app.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """import gradio as gr
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import spaces
 
61
  )
62
 
63
  if __name__ == "__main__":
64
+ gradio_app.launch()"""
65
+
66
+
67
+ import gradio as gr
68
+ import torch
69
+ from transformers import AutoTokenizer, AutoModelForCausalLM
70
+
71
+ model_name = "hosseinhimself/ISANG-v1.0-8B"
72
+
73
+ # Load tokenizer globally
74
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
75
+
76
+ def load_model():
77
+ try:
78
+ # Load the model on CPU
79
+ model = AutoModelForCausalLM.from_pretrained(
80
+ model_name,
81
+ torch_dtype=torch.float32, # Use float32 for CPU
82
+ trust_remote_code=True,
83
+ low_cpu_mem_usage=True
84
+ )
85
+ model.to("cpu") # Explicitly set the model to CPU
86
+ print("Model loaded successfully on CPU.")
87
+ return model
88
+ except Exception as e:
89
+ print(f"Error loading model: {e}")
90
+ raise
91
+
92
+ def chat(prompt, history):
93
+ model = load_model()
94
+ # Add system prompt
95
+ system_prompt = "You are ISANG, a multilingual large language model made by ISANG AI. You only respond in Persian, Korean, or English. If a user uses one of these languages, reply in the same language."
96
+
97
+ # Format the conversation history with system prompt
98
+ context = f"System: {system_prompt}\n"
99
+ for user_message, bot_message in history:
100
+ context += f"User: {user_message}\nBot: {bot_message}\n"
101
+ context += f"User: {prompt}\nBot:"
102
+
103
+ # Generate a response
104
+ inputs = tokenizer(context, return_tensors="pt", truncation=True, max_length=512)
105
+ with torch.no_grad():
106
+ outputs = model.generate(**inputs, max_new_tokens=200, temperature=0.7)
107
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
108
+
109
+ # Extract the latest response
110
+ response = response[len(context):].strip()
111
+ history.append((prompt, response))
112
+ return history, response
113
+
114
+ gradio_app = gr.ChatInterface(
115
+ fn=chat,
116
+ title="ISANG Chatbot",
117
+ description="This is a chatbot powered by the ISANG model. Enter your messages to chat with it!",
118
+ examples=[
119
+ ["سلام، چطوری؟"],
120
+ ["برام یه داستان تعریف کن"],
121
+ ["نظرت درباره هوش مصنوعی چیه؟"]
122
+ ]
123
+ )
124
+
125
+ if __name__ == "__main__":
126
+ gradio_app.launch()