ginipick commited on
Commit
9531afd
1 Parent(s): 2f00662

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -141
app.py CHANGED
@@ -1,42 +1,15 @@
1
  import os
2
- import time
3
- import spaces
 
4
  import torch
5
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
- import gradio as gr
7
- from threading import Thread
8
 
9
- MODEL_LIST = ["LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"]
10
- HF_TOKEN = os.environ.get("HF_TOKEN", None)
11
- MODEL = os.environ.get("MODEL_ID")
12
 
13
- TITLE = """
14
- <h1><center>EXAONE-3.0-7.8B-Instruct</center></h1>
15
- <center>
16
- <p>The model is licensed under EXAONE AI Model License Agreement 1.0 - NC</p>
17
- </center>
18
- """
19
-
20
- PLACEHOLDER = """
21
- <center>
22
- <p>EXAONE-3.0-7.8B-Instruct is a pre-trained and instruction-tuned bilingual (English and Korean) generative model with 7.8 billion parameters</p>
23
- </center>
24
- """
25
-
26
-
27
- CSS = """
28
- .duplicate-button {
29
- margin: auto !important;
30
- color: white !important;
31
- background: black !important;
32
- border-radius: 100vh !important;
33
- }
34
- h3 {
35
- text-align: center;
36
- }
37
- """
38
-
39
- device = "cuda" # for GPU usage or "cpu" for CPU usage
40
 
41
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
42
  model = AutoModelForCausalLM.from_pretrained(
@@ -44,29 +17,20 @@ model = AutoModelForCausalLM.from_pretrained(
44
  torch_dtype=torch.bfloat16,
45
  device_map="auto",
46
  trust_remote_code=True,
47
- ignore_mismatched_sizes=True)
 
48
 
49
- @spaces.GPU()
50
- def stream_chat(
51
- message: str,
52
- history: list,
53
- system_prompt: str,
54
- temperature: float = 0.3,
55
- max_new_tokens: int = 256,
56
- top_p: float = 1.0,
57
- top_k: int = 20,
58
- penalty: float = 1.2,
59
- ):
60
- print(f'message: {message}')
61
- print(f'history: {history}')
62
 
 
63
  conversation = [{"role": "system", "content": system_prompt}]
64
  for prompt, answer in history:
65
  conversation.extend([
66
- {"role": "user", "content": prompt},
67
  {"role": "assistant", "content": answer},
68
  ])
69
-
70
  conversation.append({"role": "user", "content": message})
71
 
72
  inputs = tokenizer.apply_chat_template(
@@ -75,97 +39,43 @@ def stream_chat(
75
  add_generation_prompt=True,
76
  return_tensors="pt"
77
  ).to(device)
78
-
79
- streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
80
-
81
- generate_kwargs = dict(
82
- input_ids=inputs,
83
- max_new_tokens = max_new_tokens,
84
- do_sample = False if temperature == 0 else True,
85
- top_p = top_p,
86
- top_k = top_k,
87
- temperature = temperature,
88
- streamer=streamer,
89
- pad_token_id = 0,
90
- eos_token_id = 361 # 361
91
- )
92
 
93
  with torch.no_grad():
94
- thread = Thread(target=model.generate, kwargs=generate_kwargs)
95
- thread.start()
96
-
97
- buffer = ""
98
- for new_text in streamer:
99
- buffer += new_text
100
- yield buffer
101
-
102
-
103
- chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
104
-
105
- with gr.Blocks(css=CSS, theme="soft") as demo:
106
- gr.HTML(TITLE)
107
- gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
108
- gr.ChatInterface(
109
- fn=stream_chat,
110
- chatbot=chatbot,
111
- fill_height=True,
112
- additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
113
- additional_inputs=[
114
- gr.Textbox(
115
- value="You are EXAONE model from LG AI Research, a helpful assistant.",
116
- label="System Prompt",
117
- render=False,
118
- ),
119
- gr.Slider(
120
- minimum=0,
121
- maximum=1,
122
- step=0.1,
123
- value=1,
124
- label="Temperature",
125
- render=False,
126
- ),
127
- gr.Slider(
128
- minimum=128,
129
- maximum=4096,
130
- step=1,
131
- value=1024,
132
- label="Max new tokens",
133
- render=False,
134
- ),
135
- gr.Slider(
136
- minimum=0.0,
137
- maximum=1.0,
138
- step=0.1,
139
- value=1.0,
140
- label="top_p",
141
- render=False,
142
- ),
143
- gr.Slider(
144
- minimum=1,
145
- maximum=50,
146
- step=1,
147
- value=50,
148
- label="top_k",
149
- render=False,
150
- ),
151
- gr.Slider(
152
- minimum=0.0,
153
- maximum=2.0,
154
- step=0.1,
155
- value=1.0,
156
- label="Repetition penalty",
157
- render=False,
158
- ),
159
- ],
160
- examples=[
161
- ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],
162
- ["What are 5 creative things I could do with my kids' art? I don't want to throw them away, but it's also so much clutter."],
163
- ["Explain who you are"],
164
- ["너의 소원을 말해봐"],
165
- ],
166
- cache_examples=False,
167
- )
168
-
169
 
170
  if __name__ == "__main__":
171
- demo.launch()
 
 
 
1
  import os
2
+ import asyncio
3
+ import discord
4
+ from discord.ext import commands
5
  import torch
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
7
 
8
+ MODEL = "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
9
+ DISCORD_TOKEN = os.getenv("DISCORD_TOKEN")
10
+ DISCORD_CHANNEL_ID = int(os.getenv("DISCORD_CHANNEL_ID"))
11
 
12
+ device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
15
  model = AutoModelForCausalLM.from_pretrained(
 
17
  torch_dtype=torch.bfloat16,
18
  device_map="auto",
19
  trust_remote_code=True,
20
+ ignore_mismatched_sizes=True
21
+ )
22
 
23
+ intents = discord.Intents.default()
24
+ intents.message_content = True
25
+ bot = commands.Bot(command_prefix="!", intents=intents)
 
 
 
 
 
 
 
 
 
 
26
 
27
+ async def generate_response(message, history, system_prompt):
28
  conversation = [{"role": "system", "content": system_prompt}]
29
  for prompt, answer in history:
30
  conversation.extend([
31
+ {"role": "user", "content": prompt},
32
  {"role": "assistant", "content": answer},
33
  ])
 
34
  conversation.append({"role": "user", "content": message})
35
 
36
  inputs = tokenizer.apply_chat_template(
 
39
  add_generation_prompt=True,
40
  return_tensors="pt"
41
  ).to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  with torch.no_grad():
44
+ output = model.generate(
45
+ inputs,
46
+ max_new_tokens=1024,
47
+ do_sample=True,
48
+ top_p=1.0,
49
+ top_k=50,
50
+ temperature=1.0,
51
+ pad_token_id=0,
52
+ eos_token_id=361
53
+ )
54
+
55
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
56
+ return response.split("Assistant:")[-1].strip()
57
+
58
+ @bot.event
59
+ async def on_ready():
60
+ print(f"{bot.user} has connected to Discord!")
61
+
62
+ @bot.event
63
+ async def on_message(message):
64
+ if message.author == bot.user:
65
+ return
66
+
67
+ if message.channel.id != DISCORD_CHANNEL_ID:
68
+ return
69
+
70
+ response = await generate_response(message.content, [], "You are EXAONE model from LG AI Research, a helpful assistant.")
71
+
72
+ # Split the response into chunks of 2000 characters
73
+ chunks = [response[i:i+2000] for i in range(0, len(response), 2000)]
74
+
75
+ for chunk in chunks:
76
+ await message.channel.send(chunk)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
  if __name__ == "__main__":
79
+ import subprocess
80
+ subprocess.Popen(["python", "web.py"])
81
+ bot.run(DISCORD_TOKEN)