Spaces:

hosseinhimself
/

ISANG-1.0-8B

Runtime error

File size: 2,430 Bytes

02d455b
fa5cabf
02d455b
65c1b88
fa5cabf
 
 
 
 
 
02d455b
fa5cabf
02d455b
fa5cabf
 
 
 
 
 
 
 
f892dbc
fa5cabf
02d455b
fa5cabf
 
 
 
 
 
02d455b
fa5cabf
 
 
 
 
 
 
 
 
 
02d455b
fa5cabf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7d2d3dd
02d455b
fa5cabf
02d455b
 
fa5cabf

import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load the model and tokenizer
def load_model():
    model_name = "hosseinhimself/ISANG-v1.0-8B"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32)
    return tokenizer, model

tokenizer, model = load_model()

# Define the inference function
def chat(input_text, max_tokens, temperature, history):
    try:
        # Concatenate history to the input prompt
        if history:
            input_text = history + "\nUser: " + input_text + "\nAssistant:"
        else:
            input_text = "User: " + input_text + "\nAssistant:"

        inputs = tokenizer.encode(input_text, return_tensors="pt")

        # Generate the response
        output = model.generate(
            inputs,
            max_length=inputs.shape[1] + max_tokens,
            temperature=temperature,
            do_sample=True
        )

        output_text = tokenizer.decode(output[0], skip_special_tokens=True)

        # Extract the assistant's reply and update history
        response_start = output_text.find("Assistant:") + len("Assistant:")
        response = output_text[response_start:].strip()
        new_history = input_text + response

        return response, new_history

    except Exception as e:
        return str(e), ""

# Gradio interface
def reset_history():
    return ""

with gr.Blocks() as demo:
    gr.Markdown("# ISANG-v1.0-8B Chatbot")

    with gr.Row():
        with gr.Column():
            user_input = gr.Textbox(label="Your Input", placeholder="Type your message here...")
            max_tokens = gr.Slider(minimum=10, maximum=512, value=256, label="Max Tokens")
            temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Temperature")
            history = gr.Textbox(label="Conversation History", value="", visible=False)
            send_button = gr.Button("Send")
            clear_button = gr.Button("Clear History")

        with gr.Column():
            chat_output = gr.Textbox(label="Assistant's Response", lines=10)

    send_button.click(
        chat, 
        inputs=[user_input, max_tokens, temperature, history], 
        outputs=[chat_output, history]
    )

    clear_button.click(reset_history, outputs=[history])

if __name__ == "__main__":
    demo.launch(enable_queue=True, share=True)