GoidaAlignment commited on
Commit
af7afe0
·
verified ·
1 Parent(s): 2102496

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -137
app.py CHANGED
@@ -1,142 +1,30 @@
1
- import os
2
- import time
3
- #import spaces
4
- import torch
5
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
  import gradio as gr
7
- from threading import Thread
8
-
9
- MODEL_LIST = ["GoidaAlignment/GOIDA-0.5B"]
10
- HF_TOKEN = os.environ.get("HF_TOKEN", None)
11
-
12
- TITLE = "<h1><center>Я СКАЗАЛ ГОООЙДА!</center></h1>"
13
-
14
- PLACEHOLDER = """
15
- <center>
16
- <p>ГООООЙДА!!</p>
17
- </center>
18
- """
19
-
20
- # pip install transformers
21
- from transformers import AutoModelForCausalLM, AutoTokenizer
22
-
23
- device = "cpu" # for GPU usage or "cpu" for CPU usage
24
-
25
- tokenizer = AutoTokenizer.from_pretrained(MODEL_LIST[0])
26
- model = AutoModelForCausalLM.from_pretrained(MODEL_LIST[0]).to(device)
27
-
28
-
29
- #@spaces.GPU()
30
- def stream_chat(
31
- message: str,
32
- history: list,
33
- temperature: float = 0.4,
34
- max_new_tokens: int = 1024,
35
- top_p: float = 1.0,
36
- top_k: int = 20,
37
- penalty: float = 1.2,
38
- choice: str = "GoidaAlignment/GOIDA-0.5B"
39
- ):
40
- print(f'message: {message}')
41
- print(f'history: {history}')
42
-
43
- conversation = []
44
- for prompt, answer in history:
45
- conversation.extend([
46
- {"role": "user", "content": prompt},
47
- {"role": "assistant", "content": answer},
48
- ])
49
-
50
- conversation.append({"role": "user", "content": message})
51
-
52
 
53
-
54
- input_text=tokenizer.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
55
- inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
56
- streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
 
57
 
58
- generate_kwargs = dict(
59
- input_ids=inputs,
60
- max_new_tokens = max_new_tokens,
61
- do_sample = False if temperature == 0 else True,
62
- top_p = top_p,
63
- top_k = top_k,
64
- temperature = temperature,
65
- streamer=streamer,
66
- )
67
-
68
- with torch.no_grad():
69
- thread = Thread(target=model.generate, kwargs=generate_kwargs)
70
- thread.start()
71
-
72
- buffer = ""
73
- for new_text in streamer:
74
- buffer += new_text
75
- yield buffer
76
-
77
-
78
- #print(tokenizer.decode(outputs[0]))
79
-
80
- chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
81
-
82
- with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
83
- gr.HTML(TITLE)
84
- gr.ChatInterface(
85
- fn=stream_chat,
86
- chatbot=chatbot,
87
- fill_height=True,
88
- additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
89
- additional_inputs=[
90
- gr.Slider(
91
- minimum=0,
92
- maximum=1,
93
- step=0.1,
94
- value=0.4,
95
- label="Temperature",
96
- render=False,
97
- ),
98
- gr.Slider(
99
- minimum=128,
100
- maximum=8192,
101
- step=1,
102
- value=1024,
103
- label="Max new tokens",
104
- render=False,
105
- ),
106
- gr.Slider(
107
- minimum=0.0,
108
- maximum=1.0,
109
- step=0.1,
110
- value=1.0,
111
- label="top_p",
112
- render=False,
113
- ),
114
- gr.Slider(
115
- minimum=1,
116
- maximum=20,
117
- step=1,
118
- value=20,
119
- label="top_k",
120
- render=False,
121
- ),
122
- gr.Slider(
123
- minimum=0.0,
124
- maximum=2.0,
125
- step=0.1,
126
- value=1.2,
127
- label="Repetition penalty",
128
- render=False,
129
- ),
130
- gr.Radio(
131
- ["GoidaAlignment/GOIDA-0.5B"],
132
- value="494M",
133
- label="Load Model",
134
- render=False,
135
- ),
136
- ],
137
- cache_examples=False,
138
- )
139
-
140
 
 
141
  if __name__ == "__main__":
142
- demo.launch()
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+
4
+ # Загрузка токенизатора и модели
5
+ model_name = "GoidaAlignment/GOIDA-0.5B" # Укажите путь к вашей модели
6
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
7
+ model = AutoModelForCausalLM.from_pretrained(model_name)
8
+
9
+ def generate_response(prompt):
10
+ inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
11
+ outputs = model.generate(inputs["input_ids"], max_length=200, num_return_sequences=1)
12
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
13
+ return response
14
+
15
+ # Интерфейс Gradio
16
+ with gr.Blocks() as demo:
17
+ gr.Markdown("# Введите запрос, и модель ответит.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ with gr.Row():
20
+ with gr.Column():
21
+ prompt_input = gr.Textbox(label="Ваш запрос", lines=4, placeholder="Введите текст")
22
+ with gr.Column():
23
+ output = gr.Textbox(label="Ответ модели", lines=6, interactive=False)
24
 
25
+ submit_button = gr.Button("Сгенерировать")
26
+ submit_button.click(generate_response, inputs=prompt_input, outputs=output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ # Запуск приложения
29
  if __name__ == "__main__":
30
+ demo.launch()