ISANG-1.0-8B / app.py
hosseinhimself's picture
Update app.py
fa5cabf verified
raw
history blame
2.43 kB
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Load the model and tokenizer
def load_model():
model_name = "hosseinhimself/ISANG-v1.0-8B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32)
return tokenizer, model
tokenizer, model = load_model()
# Define the inference function
def chat(input_text, max_tokens, temperature, history):
try:
# Concatenate history to the input prompt
if history:
input_text = history + "\nUser: " + input_text + "\nAssistant:"
else:
input_text = "User: " + input_text + "\nAssistant:"
inputs = tokenizer.encode(input_text, return_tensors="pt")
# Generate the response
output = model.generate(
inputs,
max_length=inputs.shape[1] + max_tokens,
temperature=temperature,
do_sample=True
)
output_text = tokenizer.decode(output[0], skip_special_tokens=True)
# Extract the assistant's reply and update history
response_start = output_text.find("Assistant:") + len("Assistant:")
response = output_text[response_start:].strip()
new_history = input_text + response
return response, new_history
except Exception as e:
return str(e), ""
# Gradio interface
def reset_history():
return ""
with gr.Blocks() as demo:
gr.Markdown("# ISANG-v1.0-8B Chatbot")
with gr.Row():
with gr.Column():
user_input = gr.Textbox(label="Your Input", placeholder="Type your message here...")
max_tokens = gr.Slider(minimum=10, maximum=512, value=256, label="Max Tokens")
temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Temperature")
history = gr.Textbox(label="Conversation History", value="", visible=False)
send_button = gr.Button("Send")
clear_button = gr.Button("Clear History")
with gr.Column():
chat_output = gr.Textbox(label="Assistant's Response", lines=10)
send_button.click(
chat,
inputs=[user_input, max_tokens, temperature, history],
outputs=[chat_output, history]
)
clear_button.click(reset_history, outputs=[history])
if __name__ == "__main__":
demo.launch(enable_queue=True, share=True)