File size: 4,201 Bytes
dcb9dfd
7ab6c0e
 
 
ef40512
7ab6c0e
38c5d6c
 
 
 
 
 
 
 
7ab6c0e
 
 
 
 
 
 
637b32d
7ab6c0e
 
2233781
 
 
ef40512
2233781
 
 
 
7ab6c0e
 
ee0e57f
 
 
 
ef40512
 
 
 
ee0e57f
ef40512
 
7ab6c0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95e8cfb
e4ec528
 
 
95e8cfb
 
7ab6c0e
 
 
 
e4ec528
 
 
 
7ab6c0e
 
 
 
 
 
 
 
95e8cfb
 
 
7ab6c0e
95e8cfb
7ab6c0e
 
 
 
 
 
95e8cfb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import gradio as gr
import plotly.express as px
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BlenderbotForConditionalGeneration

# Check if CUDA is available and set device accordingly
device = "cuda" if torch.cuda.is_available() else "cpu"

# Set environment variables for GPU usage and memory allocation if CUDA is available
if device == "cuda":
    os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
    torch.cuda.empty_cache()
    torch.cuda.set_per_process_memory_fraction(0.8)  # Adjust the fraction as needed

# System message (placeholder, adjust as needed)
system_message = ""

# Load the model and tokenizer
def hermes_model():
    tokenizer = AutoTokenizer.from_pretrained("TheBloke/CapybaraHermes-2.5-Mistral-7B-AWQ")
    model = AutoModelForCausalLM.from_pretrained("TheBloke/CapybaraHermes-2.5-Mistral-7B-AWQ", low_cpu_mem_usage=True, device_map="auto")
    return model, tokenizer



def blender_model():
    model = BlenderbotForConditionalGeneration.from_pretrained("facebook/blenderbot-400M-distill")
    tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
    return model, tokenizer

model, tokenizer = blender_model()

def chat_response(msg_prompt: str) -> str:
    try:
        inputs = tokenizer(msg_prompt, return_tensors="pt")
        reply_ids = model.generate(**inputs)
        outputs = tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0]
        return outputs
    except Exception as e:
        return str(e)


# Function to generate a response from the model
def chat_responses(msg_prompt: str) -> str:
    """
    Generates a response from the model given a prompt.

    Args:
        msg_prompt (str): The user's message prompt.

    Returns:
        str: The model's response.
    """
    generation_params = {
        "do_sample": True,
        "temperature": 0.7,
        "top_p": 0.95,
        "top_k": 40,
        "max_new_tokens": 512,
        "repetition_penalty": 1.1,
    }
    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, **generation_params)
    try:
        prompt_template = f'''system
        {system_message}
        user
        {msg_prompt}
        assistant
        '''
        pipe_output = pipe(prompt_template)[0]['generated_text']
        
        # Separate assistant's response from the output
        response_lines = pipe_output.split('assistant')
        assistant_response = response_lines[-1].strip() if len(response_lines) > 1 else pipe_output.strip()

        return assistant_response
    except Exception as e:
        return str(e)

# Function to generate a random plot
def random_plot():
    df = px.data.iris()
    fig = px.scatter(df, x="sepal_width", y="sepal_length", color="species",
                     size='petal_length', hover_data=['petal_width'])
    return fig

# Function to handle likes/dislikes (for demonstration purposes)
def print_like_dislike(x: gr.LikeData):
    print(x.index, x.value, x.liked)

# Function to add messages to the chat history
def add_message(history, message, files):
    if files is not None:
        for file in files:
            history.append(((file,), None))
    if message is not None:
        history.append((message, None))
    return history, gr.update(value=None, interactive=True)

# Function to simulate the bot response
def bot(history):
    if history:
        user_message = history[-1][0]
        bot_response = chat_response(user_message)
        history[-1][1] = bot_response
    return history

fig = random_plot()

# Gradio interface setup
with gr.Blocks(fill_height=True) as demo:
    chatbot = gr.Chatbot(elem_id="chatbot", bubble_full_width=False, scale=1)

    with gr.Row():
        chat_input = gr.Textbox(placeholder="Enter message...", show_label=False)
        file_input = gr.File(label="Upload file(s)", file_count="multiple")

    chat_msg = chat_input.submit(add_message, [chatbot, chat_input, file_input], [chatbot, chat_input])
    bot_msg = chat_msg.then(bot, chatbot, chatbot)
    bot_msg.then(lambda: gr.update(interactive=True), None, [chat_input])

    chatbot.like(print_like_dislike, None, None)

demo.queue()
demo.launch()