Spaces:

hosseinhimself
/

ISANG-1.0-8B

Runtime error

App Files Files Community

hosseinhimself commited on Dec 30, 2024

Commit

393b5f6

verified ·

1 Parent(s): db77b63

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -69

app.py CHANGED Viewed

@@ -1,76 +1,60 @@
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-# Define device
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Load the model and tokenizer
 model_name = "hosseinhimself/ISANG-v1.0-8B"
-tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
-model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
-def chat_with_model(history, user_input):
-    """
-    Generate a response using the model, considering the last two interactions.
-    Parameters:
-        history (list of tuples): Conversation history as a list of (user, bot) pairs.
-        user_input (str): The latest user input.
-    Returns:
-        history (list of tuples): Updated conversation history.
-    """
-    # Use the last two interactions for context
-    context = ""
-    for user_message, bot_message in history[-2:]:
-        context += f"User: {user_message}\nBot: {bot_message}\n"
-    # Add the current user input
-    context += f"User: {user_input}\nBot:"
-    # Tokenize and generate a response
-    inputs = tokenizer(context, return_tensors="pt", truncation=True).to(device)
-    output = model.generate(inputs.input_ids, max_new_tokens=100)
-    bot_response = tokenizer.decode(output[0], skip_special_tokens=True)
-    # Extract only the bot's new response (to avoid repeating context)
-    bot_response = bot_response[len(context):].strip()
-    # Update the conversation history
-    history.append((user_input, bot_response))
-    return history
-def gradio_format(history):
-    """
-    Format the history for Gradio ChatInterface.
-    Parameters:
-        history (list of tuples): Conversation history as a list of (user, bot) pairs.
-    Returns:
-        List of dictionaries compatible with Gradio ChatInterface.
-    """
-    return [[user, bot] for user, bot in history]
-# Initialize empty history
-history = []
-def interface_function(user_input):
-    global history
-    history = chat_with_model(history, user_input)
-    return gradio_format(history)
-# Create Gradio interface
-chatbot = gr.ChatInterface(
-    fn=interface_function,
-    inputs=[gr.Textbox(lines=2, label="Your Input")],
-    outputs=[gr.Chatbot(label="Chat History")],
-    title="Persian Chatbot",
-    description="A chatbot that translates or responds to Persian prompts using ISANG-v1.0-8B model."
 )
-# Launch the app
 if __name__ == "__main__":
-    chatbot.launch()

 import gradio as gr
 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from peft import PeftModel, PeftConfig
+import spaces
+import time
 model_name = "hosseinhimself/ISANG-v1.0-8B"
+base_model_name = "unsloth/Meta-Llama-3.1-8B"
+# Load tokenizer globally
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+@spaces.GPU
+def load_model():
+    try:
+        # Load the base model
+        base_model = AutoModelForCausalLM.from_pretrained(
+            base_model_name,
+            torch_dtype=torch.float16,
+            device_map="auto",
+            trust_remote_code=True,
+            low_cpu_mem_usage=True
+        )
+        # Load the PEFT model
+        model = PeftModel.from_pretrained(base_model, model_name)
+        print(f"Model loaded successfully. Using device: {model.device}")
+        return model
+    except Exception as e:
+        print(f"Error loading model: {e}")
+        raise
+@spaces.GPU
+def generate_text(prompt):
+    model = load_model()
+    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
+    inputs = {k: v.to(model.device) for k, v in inputs.items()}
+    with torch.no_grad():
+        outputs = model.generate(**inputs, max_new_tokens=200, num_return_sequences=1, temperature=0.7)
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return response
+gradio_app = gr.Interface(
+    generate_text,
+    inputs=gr.Textbox(label="Enter your message", lines=3),
+    outputs=gr.Textbox(label="Chatbot Response"),
+    title="ISANG Chatbot",
+    description=f"""This is a simple chatbot powered by the ISANG model. It is fine-tuned from the {base_model_name} model.
+    Enter your message and see how the chatbot responds!""",
+    examples=[
+        ["سلام، چطوری؟"],
+        ["برام یه داستان تعریف کن"],
+        ["بهترین کتابی که خوندی چی بوده؟"],
+        ["توی اوقات فراغتت چی کار می‌کنی؟"],
+        ["نظرت درباره هوش مصنوعی چیه؟"]
+    ]
 )
 if __name__ == "__main__":
+    gradio_app.launch()