File size: 3,234 Bytes
8f08f12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98fb501
8f08f12
 
 
 
 
 
 
229734c
 
8f08f12
 
 
 
 
 
 
 
 
 
 
229734c
8f08f12
229734c
8f08f12
229734c
 
8f08f12
229734c
 
 
 
 
 
8f08f12
229734c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import json
import os
from pprint import pprint

import bitsandbytes as bnb
import pandas as pd
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset
from huggingface_hub import notebook_login
from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training,
)
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)
import gradio as gr


bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

PEFT_MODEL = "cdy3870/Falcon-Fetch-Bot"

config = PeftConfig.from_pretrained(PEFT_MODEL)
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    device_map="auto",
    trust_remote_code=True, load_in_8bit=False, offload_folder="offload"
)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token

model = PeftModel.from_pretrained(model, PEFT_MODEL)

generation_config = model.generation_config
generation_config.max_new_tokens = 150
generation_config.temperature = 0.6
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id

pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

def main():

    with gr.Blocks() as demo:

        def update_temp(temp):
            generation_config.temperature = temp

        def update_tokens(tokens):
            generation_config.max_new_tokens = tokens
        
        chatbot = gr.Chatbot(label="Fetch Rewards Chatbot")
        temperature = gr.Slider(0, 1, value=0.6, step=0.1, label="Creativity", interactive=True)
        temperature.change(fn=update_temp, inputs=temperature)

        tokens = gr.Slider(50, 200, value=100, step=50, label="Length", interactive=True)
        tokens.change(fn=update_tokens, inputs=tokens)

        msg = gr.Textbox(label="", placeholder="Ask anything about Fetch!")
        clear = gr.Button("Clear Log")

        def user(user_message, history):
            return "", history + [[user_message, None]]

        def bot(history):
            
            message = history[-1][0]
            prompt = f"""
            <human>: {message}
            <assistant>:
            """.strip()
            
            result = pipeline(
                prompt,
                generation_config=generation_config,
            )
            # print(result)
            parsed_result = result[0]["generated_text"].split("<assistant>:")[1][1:].split("\n")[0]
                
            history[-1][1] = ""
            for character in parsed_result:
                history[-1][1] += character
                time.sleep(0.01)
                yield history

        msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
            bot, chatbot, chatbot
        )
        clear.click(lambda: None, None, chatbot, queue=False)

    demo.queue()
    demo.launch()


if __name__ == "__main__":
    main()