Spaces:

hosseinhimself
/

ISANG-1.0-8B

Runtime error

App Files Files Community

hosseinhimself commited on Dec 27, 2024

Commit

c62d678

verified ·

1 Parent(s): 5d8e9c5

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -15

app.py CHANGED Viewed

@@ -8,8 +8,6 @@ device = torch.device("cpu")  # Ensure it's using CPU only
 # Load model and tokenizer
 model_name = "hosseinhimself/ISANG-v1.0-8B"  # Replace with your model name
 tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
-# Load the model for inference on CPU
 model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
 # Define the Alpaca-style prompt template
@@ -24,23 +22,29 @@ You are ISANG, a multilingual large language model made by ISANG AI. You only re
 ### Response:
 {}"""
-EOS_TOKEN = tokenizer.eos_token  # Must add EOS_TOKEN
-# Define a function to generate responses
 def generate_response(input_text, max_tokens=1024, temperature=0.7, history=[]):
-    # Prepare the inputs for the model with the history
-    prompt = f"Chat History: {history[-2:]}\nUser: {input_text}\nAI:"
-    # Tokenize the input and prepare it for inference
-    inputs = tokenizer(prompt, return_tensors="pt").to(device)  # Ensure using CPU
-    # Set the max new tokens and temperature parameters for model generation
-    output = model.generate(**inputs, max_new_tokens=max_tokens, temperature=temperature, use_cache=True)
-    # Decode the model output and remove special tokens
-    response = tokenizer.decode(output[0], skip_special_tokens=True)
-    # Update history with the new conversation
     history.append(f"User: {input_text}")
     history.append(f"AI: {response}")
@@ -59,7 +63,7 @@ iface = gr.Interface(
     title="ISANG Chatbot",
     description="A chatbot powered by ISANG-v1.0-8B model. Chat with me!",
     theme="huggingface",  # Purple theme
-    live=True
 )
 # Launch the interface

 # Load model and tokenizer
 model_name = "hosseinhimself/ISANG-v1.0-8B"  # Replace with your model name
 tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
 model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
 # Define the Alpaca-style prompt template
 ### Response:
 {}"""
+# Function to generate responses
 def generate_response(input_text, max_tokens=1024, temperature=0.7, history=[]):
+    # Retain only the last two exchanges for context
+    if len(history) > 2:
+        history = history[-2:]
+    # Format the prompt
+    prompt = "\n".join(history + [f"User: {input_text}\nAI:"])
+    # Tokenize the input
+    inputs = tokenizer(prompt, return_tensors="pt").to(device)
+    # Generate model output
+    output = model.generate(
+        inputs.input_ids,
+        max_new_tokens=max_tokens,
+        temperature=temperature
+    )
+    # Decode the model output
+    response = tokenizer.decode(output[0], skip_special_tokens=True).strip()
+    # Update the history
     history.append(f"User: {input_text}")
     history.append(f"AI: {response}")
     title="ISANG Chatbot",
     description="A chatbot powered by ISANG-v1.0-8B model. Chat with me!",
     theme="huggingface",  # Purple theme
+    live=False  # Set to False since live updates aren't required
 )
 # Launch the interface