Spaces:

hosseinhimself
/

ISANG-1.0-8B

Runtime error

App Files Files Community

hosseinhimself commited on Dec 27, 2024

Commit

4b4deff

verified ·

1 Parent(s): 1f688a3

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -7

app.py CHANGED Viewed

@@ -1,24 +1,40 @@
 import gradio as gr
 from unsloth import FastLanguageModel
 from transformers import AutoTokenizer, TextStreamer
-# Load the tokenizer
-model_name = "hosseinhimself/ISANG-v1.0-8B"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 # Initialize the model for faster inference using Unsloth
 model = FastLanguageModel.from_pretrained(model_name)
 FastLanguageModel.for_inference(model)  # Enable faster inference
-# Set hyperparameters for inference
 def generate_response(input_text, max_tokens=1024, temperature=0.7, history=[]):
-    # Prepare the inputs for the model
     prompt = f"Chat History: {history[-2:]}\nUser: {input_text}\nAI:"
     inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
     # Set the max new tokens and temperature parameters for model generation
-    output = model.generate(**inputs, max_new_tokens=max_tokens, temperature=temperature)
     # Decode the model output and remove special tokens
     response = tokenizer.decode(output[0], skip_special_tokens=True)
@@ -36,12 +52,12 @@ iface = gr.Interface(
         gr.Textbox(label="Your Message", placeholder="Type your message here..."),
         gr.Slider(minimum=1, maximum=2048, value=1024, step=1, label="Max Tokens"),
         gr.Slider(minimum=0.0, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
-        gr.State(value=[])
     ],
     outputs=[gr.Textbox(label="AI Response"), gr.State()],
     title="ISANG Chatbot",
     description="A chatbot powered by ISANG-v1.0-8B model. Chat with me!",
-    theme="huggingface",
     live=True
 )

 import gradio as gr
 from unsloth import FastLanguageModel
 from transformers import AutoTokenizer, TextStreamer
+import torch
+# Load model and tokenizer
+model_name = "unsloth/Meta-Llama-3.1-8B"  # Replace with your model if needed
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 # Initialize the model for faster inference using Unsloth
 model = FastLanguageModel.from_pretrained(model_name)
 FastLanguageModel.for_inference(model)  # Enable faster inference
+# Define the Alpaca-style prompt template
+alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
+### Instruction:
+You are ISANG, a multilingual large language model made by ISANG AI. You only respond in Persian, Korean, or English. If a user uses one of these languages, reply in the same language.
+### Input:
+{}
+### Response:
+{}"""
+EOS_TOKEN = tokenizer.eos_token  # Must add EOS_TOKEN
+# Define a function to generate responses
 def generate_response(input_text, max_tokens=1024, temperature=0.7, history=[]):
+    # Prepare the inputs for the model with the history
     prompt = f"Chat History: {history[-2:]}\nUser: {input_text}\nAI:"
+    # Tokenize the input and prepare it for inference
     inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
     # Set the max new tokens and temperature parameters for model generation
+    output = model.generate(**inputs, max_new_tokens=max_tokens, temperature=temperature, use_cache=True)
     # Decode the model output and remove special tokens
     response = tokenizer.decode(output[0], skip_special_tokens=True)
         gr.Textbox(label="Your Message", placeholder="Type your message here..."),
         gr.Slider(minimum=1, maximum=2048, value=1024, step=1, label="Max Tokens"),
         gr.Slider(minimum=0.0, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
+        gr.State(value=[])  # To maintain conversation history
     ],
     outputs=[gr.Textbox(label="AI Response"), gr.State()],
     title="ISANG Chatbot",
     description="A chatbot powered by ISANG-v1.0-8B model. Chat with me!",
+    theme="huggingface",  # Purple theme
     live=True
 )