File size: 2,430 Bytes
02d455b
fa5cabf
02d455b
65c1b88
fa5cabf
 
 
 
 
 
02d455b
fa5cabf
02d455b
fa5cabf
 
 
 
 
 
 
 
f892dbc
fa5cabf
02d455b
fa5cabf
 
 
 
 
 
02d455b
fa5cabf
 
 
 
 
 
 
 
 
 
02d455b
fa5cabf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7d2d3dd
02d455b
fa5cabf
02d455b
 
fa5cabf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load the model and tokenizer
def load_model():
    model_name = "hosseinhimself/ISANG-v1.0-8B"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32)
    return tokenizer, model

tokenizer, model = load_model()

# Define the inference function
def chat(input_text, max_tokens, temperature, history):
    try:
        # Concatenate history to the input prompt
        if history:
            input_text = history + "\nUser: " + input_text + "\nAssistant:"
        else:
            input_text = "User: " + input_text + "\nAssistant:"

        inputs = tokenizer.encode(input_text, return_tensors="pt")

        # Generate the response
        output = model.generate(
            inputs,
            max_length=inputs.shape[1] + max_tokens,
            temperature=temperature,
            do_sample=True
        )

        output_text = tokenizer.decode(output[0], skip_special_tokens=True)

        # Extract the assistant's reply and update history
        response_start = output_text.find("Assistant:") + len("Assistant:")
        response = output_text[response_start:].strip()
        new_history = input_text + response

        return response, new_history

    except Exception as e:
        return str(e), ""

# Gradio interface
def reset_history():
    return ""

with gr.Blocks() as demo:
    gr.Markdown("# ISANG-v1.0-8B Chatbot")

    with gr.Row():
        with gr.Column():
            user_input = gr.Textbox(label="Your Input", placeholder="Type your message here...")
            max_tokens = gr.Slider(minimum=10, maximum=512, value=256, label="Max Tokens")
            temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Temperature")
            history = gr.Textbox(label="Conversation History", value="", visible=False)
            send_button = gr.Button("Send")
            clear_button = gr.Button("Clear History")

        with gr.Column():
            chat_output = gr.Textbox(label="Assistant's Response", lines=10)

    send_button.click(
        chat, 
        inputs=[user_input, max_tokens, temperature, history], 
        outputs=[chat_output, history]
    )

    clear_button.click(reset_history, outputs=[history])

if __name__ == "__main__":
    demo.launch(enable_queue=True, share=True)