Corvius commited on
Commit
f99e888
β€’
1 Parent(s): 103d0d2

we are so unback...

Browse files
Files changed (1) hide show
  1. app.py +60 -101
app.py CHANGED
@@ -5,37 +5,34 @@ import os
5
  import datetime
6
  from requests.exceptions import RequestException
7
 
8
- # gorillion totally mine OR keys go here :3
9
- api_keys_env = os.environ.get('API_KEYS')
10
- if api_keys_env:
11
- API_KEYS = [key.strip() for key in api_keys_env.strip().split('\n') if key.strip()]
12
- else:
13
- raise ValueError("all keez ded go kys") # this will never happen due to the superior OR key acquisition method, but juuuust in case (if it actually happens it is joever for joEver.)
14
-
15
  API_URL = os.environ.get('API_URL')
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  DEFAULT_PARAMS = {
18
- "temperature": 1.0,
19
- "top_p": 1,
20
- "top_k": 0,
21
- "min_p": 0,
22
- "top_a": 0.1,
23
  "frequency_penalty": 0,
24
  "presence_penalty": 0,
25
  "repetition_penalty": 1.1,
26
  "max_tokens": 512
27
  }
28
 
29
- # debug switches
30
- USER_LOGGING_ENABLED = False
31
- RESPONSE_LOGGING_ENABLED = True
32
-
33
  def get_timestamp():
34
  return datetime.datetime.now().strftime("%H:%M:%S")
35
 
36
- def predict(message, history, system_prompt, temperature, top_p, top_k, min_p, top_a,
37
- frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
38
- history_format = [{"role": "system", "content": system_prompt}] if system_prompt else []
39
  for human, assistant in history:
40
  history_format.append({"role": "user", "content": human})
41
  if assistant:
@@ -50,8 +47,6 @@ def predict(message, history, system_prompt, temperature, top_p, top_k, min_p, t
50
  "temperature": temperature,
51
  "top_p": top_p,
52
  "top_k": top_k,
53
- "min_p": min_p,
54
- "top_a": top_a,
55
  "frequency_penalty": frequency_penalty,
56
  "presence_penalty": presence_penalty,
57
  "repetition_penalty": repetition_penalty,
@@ -65,76 +60,48 @@ def predict(message, history, system_prompt, temperature, top_p, top_k, min_p, t
65
  print(f"{param}={value}")
66
 
67
  data = {
68
- "model": "meta-llama/llama-3.1-405b-instruct:free",
69
  "messages": history_format,
70
  "stream": True,
71
  "temperature": temperature,
72
  "top_p": top_p,
73
  "top_k": top_k,
74
- "min_p": min_p,
75
- "top_a": top_a,
76
  "frequency_penalty": frequency_penalty,
77
  "presence_penalty": presence_penalty,
78
  "repetition_penalty": repetition_penalty,
79
  "max_tokens": max_tokens
80
  }
81
 
82
- api_key_index = 0
83
- retries = 0
84
- max_retries = len(API_KEYS)
85
-
86
- while retries < max_retries:
87
- current_api_key = API_KEYS[api_key_index]
88
-
89
- headers = {
90
- "Authorization": f"Bearer {current_api_key}",
91
- "Content-Type": "application/json"
92
- }
93
-
94
- try:
95
- with requests.post(API_URL, headers=headers, data=json.dumps(data), stream=True) as response:
96
- if response.status_code == 200:
97
- partial_message = ""
98
- for line in response.iter_lines():
99
- if stop_flag[0]:
100
- response.close()
101
- break
102
- if line:
103
- line = line.decode('utf-8')
104
- if RESPONSE_LOGGING_ENABLED:
105
- print(f"API Response: {line}")
106
- if line.startswith("data: "):
107
- if line.strip() == "data: [DONE]":
108
- break
109
- try:
110
- json_data = json.loads(line[6:])
111
- if 'choices' in json_data and json_data['choices']:
112
- delta = json_data['choices'][0]['delta']
113
- content = delta.get('content', '')
114
- if content:
115
- partial_message += content
116
- yield partial_message
117
- except json.JSONDecodeError:
118
- continue
119
- if partial_message:
120
- yield partial_message
121
- # successful response, break out of retry loop
122
- break
123
- elif response.status_code == 429:
124
- print("rate limit hit, cycling keys...")
125
- retries += 1
126
- api_key_index = (api_key_index + 1) % len(API_KEYS)
127
- continue
128
- else:
129
- # theothershits
130
- error_message = f"Error: Received status code {response.status_code} - {response.text}"
131
- print(error_message)
132
- yield f"An error occurred: {error_message}"
133
  break
134
- except RequestException as e:
135
- print(f"Request error: {e}")
136
- yield f"An error occurred: {str(e)}"
137
- break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
  def import_chat(custom_format_string):
140
  try:
@@ -180,7 +147,7 @@ with gr.Blocks(theme='gradio/monochrome') as demo:
180
  with gr.Row():
181
  with gr.Column(scale=2):
182
  chatbot = gr.Chatbot(value=[])
183
- msg = gr.Textbox(label="Message")
184
  with gr.Row():
185
  clear = gr.Button("Clear")
186
  regenerate = gr.Button("Regenerate")
@@ -194,29 +161,25 @@ with gr.Blocks(theme='gradio/monochrome') as demo:
194
 
195
  with gr.Column(scale=1):
196
  system_prompt = gr.Textbox("", label="System Prompt", lines=5)
197
- temperature = gr.Slider(0, 2, value=DEFAULT_PARAMS["temperature"], step=0.01, label="Temperature")
198
- top_p = gr.Slider(0, 1, value=DEFAULT_PARAMS["top_p"], step=0.01, label="Top P")
199
- top_k = gr.Slider(0, 500, value=DEFAULT_PARAMS["top_k"], step=1, label="Top K")
200
- min_p = gr.Slider(0, 1, value=DEFAULT_PARAMS["min_p"], step=0.01, label="Min P")
201
- top_a = gr.Slider(0, 1, value=DEFAULT_PARAMS["top_a"], step=0.01, label="Top A")
202
- frequency_penalty = gr.Slider(-2, 2, value=DEFAULT_PARAMS["frequency_penalty"], step=0.1, label="Frequency Penalty")
203
- presence_penalty = gr.Slider(-2, 2, value=DEFAULT_PARAMS["presence_penalty"], step=0.1, label="Presence Penalty")
204
- repetition_penalty = gr.Slider(0.01, 5, value=DEFAULT_PARAMS["repetition_penalty"], step=0.01, label="Repetition Penalty")
205
- max_tokens = gr.Slider(1, 4096, value=DEFAULT_PARAMS["max_tokens"], step=1, label="Max Output (max_tokens)")
206
 
207
  def user(user_message, history):
208
  history = history or []
209
  return "", history + [[user_message, None]]
210
 
211
- def bot(history, system_prompt, temperature, top_p, top_k, min_p, top_a,
212
- frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
213
  stop_flag[0] = False
214
  history = history or []
215
  if not history:
216
  return history
217
  user_message = history[-1][0]
218
- bot_message = predict(user_message, history[:-1], system_prompt, temperature, top_p, top_k, min_p, top_a,
219
- frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag)
220
  history[-1][1] = ""
221
  for chunk in bot_message:
222
  if stop_flag[0]:
@@ -225,13 +188,11 @@ with gr.Blocks(theme='gradio/monochrome') as demo:
225
  history[-1][1] = chunk
226
  yield history
227
 
228
- def regenerate_response(history, system_prompt, temperature, top_p, top_k, min_p, top_a,
229
- frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
230
  if history and len(history) > 0:
231
  last_user_message = history[-1][0]
232
  history[-1][1] = None
233
- for new_history in bot(history, system_prompt, temperature, top_p, top_k, min_p, top_a,
234
- frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
235
  yield new_history
236
  else:
237
  yield []
@@ -241,16 +202,14 @@ with gr.Blocks(theme='gradio/monochrome') as demo:
241
  return imported_history, imported_system_prompt
242
 
243
  msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
244
- bot, [chatbot, system_prompt, temperature, top_p, top_k, min_p, top_a,
245
- frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag], chatbot
246
  )
247
 
248
  clear.click(lambda: None, None, chatbot, queue=False)
249
 
250
  regenerate.click(
251
  regenerate_response,
252
- [chatbot, system_prompt, temperature, top_p, top_k, min_p, top_a,
253
- frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag],
254
  chatbot
255
  )
256
 
 
5
  import datetime
6
  from requests.exceptions import RequestException
7
 
 
 
 
 
 
 
 
8
  API_URL = os.environ.get('API_URL')
9
+ API_KEY = os.environ.get('API_KEY')
10
+
11
+ headers = {
12
+ "Authorization": f"Bearer {API_KEY}",
13
+ "Content-Type": "application/json",
14
+ 'Referer': os.environ.get('REFERRER_URL')
15
+ }
16
+
17
+ # debug switches
18
+ USER_LOGGING_ENABLED = False
19
+ RESPONSE_LOGGING_ENABLED = True
20
 
21
  DEFAULT_PARAMS = {
22
+ "temperature": 0.8,
23
+ "top_p": 0.95,
24
+ "top_k": 40,
 
 
25
  "frequency_penalty": 0,
26
  "presence_penalty": 0,
27
  "repetition_penalty": 1.1,
28
  "max_tokens": 512
29
  }
30
 
 
 
 
 
31
  def get_timestamp():
32
  return datetime.datetime.now().strftime("%H:%M:%S")
33
 
34
+ def predict(message, history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
35
+ history_format = [{"role": "system", "content": system_prompt}]
 
36
  for human, assistant in history:
37
  history_format.append({"role": "user", "content": human})
38
  if assistant:
 
47
  "temperature": temperature,
48
  "top_p": top_p,
49
  "top_k": top_k,
 
 
50
  "frequency_penalty": frequency_penalty,
51
  "presence_penalty": presence_penalty,
52
  "repetition_penalty": repetition_penalty,
 
60
  print(f"{param}={value}")
61
 
62
  data = {
63
+ "model": "meta-llama/Meta-Llama-3.1-70B-Instruct",
64
  "messages": history_format,
65
  "stream": True,
66
  "temperature": temperature,
67
  "top_p": top_p,
68
  "top_k": top_k,
 
 
69
  "frequency_penalty": frequency_penalty,
70
  "presence_penalty": presence_penalty,
71
  "repetition_penalty": repetition_penalty,
72
  "max_tokens": max_tokens
73
  }
74
 
75
+ try:
76
+ with requests.post(API_URL, headers=headers, data=json.dumps(data), stream=True) as response:
77
+ partial_message = ""
78
+ for line in response.iter_lines():
79
+ if stop_flag[0]:
80
+ response.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  break
82
+ if line:
83
+ line = line.decode('utf-8')
84
+ if RESPONSE_LOGGING_ENABLED:
85
+ print(f"API Response: {line}")
86
+ if line.startswith("data: "):
87
+ if line.strip() == "data: [DONE]":
88
+ break
89
+ try:
90
+ json_data = json.loads(line[6:])
91
+ if 'choices' in json_data and json_data['choices']:
92
+ content = json_data['choices'][0]['delta'].get('content', '')
93
+ if content:
94
+ partial_message += content
95
+ yield partial_message
96
+ except json.JSONDecodeError:
97
+ continue
98
+
99
+ if partial_message:
100
+ yield partial_message
101
+
102
+ except RequestException as e:
103
+ print(f"Request error: {e}")
104
+ yield f"An error occurred: {str(e)}"
105
 
106
  def import_chat(custom_format_string):
107
  try:
 
147
  with gr.Row():
148
  with gr.Column(scale=2):
149
  chatbot = gr.Chatbot(value=[])
150
+ msg = gr.Textbox(label="Message (70B for now. The provider might bug out at random. The space may restart frequently)")
151
  with gr.Row():
152
  clear = gr.Button("Clear")
153
  regenerate = gr.Button("Regenerate")
 
161
 
162
  with gr.Column(scale=1):
163
  system_prompt = gr.Textbox("", label="System Prompt", lines=5)
164
+ temperature = gr.Slider(0, 2, value=0.8, step=0.01, label="Temperature")
165
+ top_p = gr.Slider(0, 1, value=0.95, step=0.01, label="Top P")
166
+ top_k = gr.Slider(1, 500, value=40, step=1, label="Top K")
167
+ frequency_penalty = gr.Slider(-2, 2, value=0, step=0.1, label="Frequency Penalty")
168
+ presence_penalty = gr.Slider(-2, 2, value=0, step=0.1, label="Presence Penalty")
169
+ repetition_penalty = gr.Slider(0.01, 5, value=1.1, step=0.01, label="Repetition Penalty")
170
+ max_tokens = gr.Slider(1, 4096, value=512, step=1, label="Max Output (max_tokens)")
 
 
171
 
172
  def user(user_message, history):
173
  history = history or []
174
  return "", history + [[user_message, None]]
175
 
176
+ def bot(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
 
177
  stop_flag[0] = False
178
  history = history or []
179
  if not history:
180
  return history
181
  user_message = history[-1][0]
182
+ bot_message = predict(user_message, history[:-1], system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag)
 
183
  history[-1][1] = ""
184
  for chunk in bot_message:
185
  if stop_flag[0]:
 
188
  history[-1][1] = chunk
189
  yield history
190
 
191
+ def regenerate_response(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
 
192
  if history and len(history) > 0:
193
  last_user_message = history[-1][0]
194
  history[-1][1] = None
195
+ for new_history in bot(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
 
196
  yield new_history
197
  else:
198
  yield []
 
202
  return imported_history, imported_system_prompt
203
 
204
  msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
205
+ bot, [chatbot, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag], chatbot
 
206
  )
207
 
208
  clear.click(lambda: None, None, chatbot, queue=False)
209
 
210
  regenerate.click(
211
  regenerate_response,
212
+ [chatbot, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag],
 
213
  chatbot
214
  )
215