baruga commited on
Commit
4fcdc53
1 Parent(s): 3c87ea6

Add token streaming

Browse files
Files changed (3) hide show
  1. .gitignore +4 -1
  2. app.py +56 -66
  3. requirements.txt +2 -1
.gitignore CHANGED
@@ -1 +1,4 @@
1
- .env
 
 
 
 
1
+ .env
2
+ prompts.txt
3
+ test.py
4
+ app_backup.py
app.py CHANGED
@@ -2,11 +2,13 @@ import gradio as gr
2
  import openai
3
  import os
4
  import requests
 
 
5
 
6
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
7
  ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD")
8
 
9
- openai.api_key = OPENAI_API_KEY
10
 
11
  default_system_message = {"role": "system", "content": "You are a brilliant, helpful assistant, always providing answers to the best of your knowledge. If you are unsure of the answer, you indicate it to the user. Currently, you don't have access to the internet."}
12
  personalities = {
@@ -26,40 +28,35 @@ def get_completion(model, personality, user_message, message_history, chatlog_hi
26
  updated_message_history[0] = system_message
27
  new_history_row = {"role": "user", "content": user_message}
28
  updated_message_history = updated_message_history + [new_history_row]
29
- headers = {
30
- "Content-Type": "application/json",
31
- "Authorization": f"Bearer {openai.api_key}",
32
- }
33
- payload = {
34
- "model":model,
35
- "messages":updated_message_history,
36
- "temperature":temperature,
37
- "max_tokens":maximum_length,
38
- "top_p":top_p,
39
- "frequency_penalty":frequency_penalty,
40
- "presence_penalty":presence_penalty,
41
- }
42
- completion = requests.post(
43
- "https://api.openai.com/v1/chat/completions",
44
- headers=headers,
45
- json=payload,
46
  )
47
- completion = completion.json()
48
- # completion = openai.ChatCompletion.create(
49
- # model=model,
50
- # messages=updated_message_history,
51
- # temperature=temperature,
52
- # max_tokens=maximum_length,
53
- # top_p=top_p,
54
- # frequency_penalty=frequency_penalty,
55
- # presence_penalty=presence_penalty,
56
- # )
57
- assistant_message = completion["choices"][0]["message"]["content"]
58
- new_history_row = {"role": "assistant", "content": assistant_message}
59
  updated_message_history = updated_message_history + [new_history_row]
60
- updated_chatlog_history = chatlog_history + [(user_message, assistant_message)]
61
- token_count = completion["usage"]["total_tokens"]
62
- return "", updated_message_history, updated_chatlog_history, updated_chatlog_history, token_count
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  def retry_completion(model, personality, message_history, chatlog_history, temperature, maximum_length, top_p, frequency_penalty, presence_penalty):
65
  # set personality
@@ -72,40 +69,33 @@ def retry_completion(model, personality, message_history, chatlog_history, tempe
72
  updated_chatlog_history = chatlog_history[:-1]
73
  # delete latest assistant message from message_history
74
  updated_message_history = updated_message_history[:-1]
75
- headers = {
76
- "Content-Type": "application/json",
77
- "Authorization": f"Bearer {openai.api_key}",
78
- }
79
- payload = {
80
- "model":model,
81
- "messages":updated_message_history,
82
- "temperature":temperature,
83
- "max_tokens":maximum_length,
84
- "top_p":top_p,
85
- "frequency_penalty":frequency_penalty,
86
- "presence_penalty":presence_penalty,
87
- }
88
- completion = requests.post(
89
- "https://api.openai.com/v1/chat/completions",
90
- headers=headers,
91
- json=payload,
92
  )
93
- completion = completion.json()
94
- # completion = openai.ChatCompletion.create(
95
- # model=model,
96
- # messages=updated_message_history,
97
- # temperature=temperature,
98
- # max_tokens=maximum_length,
99
- # top_p=top_p,
100
- # frequency_penalty=frequency_penalty,
101
- # presence_penalty=presence_penalty,
102
- # )
103
- assistant_message = completion["choices"][0]["message"]["content"]
104
- new_history_row = {"role": "assistant", "content": assistant_message}
105
  updated_message_history = updated_message_history + [new_history_row]
106
- updated_chatlog_history = updated_chatlog_history + [(user_message, assistant_message)]
107
- token_count = completion["usage"]["total_tokens"]
108
- return "", updated_message_history, updated_chatlog_history, updated_chatlog_history, token_count
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
  def reset_chat():
111
  return "", [default_system_message], [], [], 0
@@ -140,4 +130,4 @@ with gr.Blocks(theme=theme) as app:
140
  retry_button.click(retry_completion, inputs=[model, personality, message_history, chatlog_history, temperature, maximum_length, top_p, frequency_penalty, presence_penalty], outputs=[user_message, message_history, chatlog_history, chatbot, token_count])
141
  reset_button.click(reset_chat, inputs=[], outputs=[user_message, message_history, chatlog_history, chatbot, token_count])
142
 
143
- app.launch(auth=("admin", ADMIN_PASSWORD))
 
2
  import openai
3
  import os
4
  import requests
5
+ from transformers import GPT2TokenizerFast
6
+
7
 
8
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
9
  ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD")
10
 
11
+ tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
12
 
13
  default_system_message = {"role": "system", "content": "You are a brilliant, helpful assistant, always providing answers to the best of your knowledge. If you are unsure of the answer, you indicate it to the user. Currently, you don't have access to the internet."}
14
  personalities = {
 
28
  updated_message_history[0] = system_message
29
  new_history_row = {"role": "user", "content": user_message}
30
  updated_message_history = updated_message_history + [new_history_row]
31
+ response = openai.ChatCompletion.create(
32
+ model=model,
33
+ messages=updated_message_history,
34
+ temperature=temperature,
35
+ max_tokens=maximum_length,
36
+ top_p=top_p,
37
+ frequency_penalty=frequency_penalty,
38
+ presence_penalty=presence_penalty,
39
+ stream=True,
 
 
 
 
 
 
 
 
40
  )
41
+ new_history_row = {"role": "assistant", "content": ""}
 
 
 
 
 
 
 
 
 
 
 
42
  updated_message_history = updated_message_history + [new_history_row]
43
+ updated_chatlog_history = chatlog_history + [[user_message, ""]]
44
+ # create variables to collect the stream of chunks
45
+ collected_chunks = []
46
+ collected_messages = []
47
+ # iterate through the stream of events
48
+ for chunk in response:
49
+ collected_chunks.append(chunk) # save the event response
50
+ chunk_message = chunk['choices'][0]['delta'] # extract the message
51
+ collected_messages.append(chunk_message) # save the message
52
+ assistant_message = ''.join([m.get('content', '') for m in collected_messages])
53
+ updated_message_history[-1]["content"] = assistant_message
54
+ updated_chatlog_history[-1][1] = assistant_message
55
+ full_prompt = '\n'.join([row[0] + row[1] for row in updated_chatlog_history])
56
+ token_count = len(tokenizer(full_prompt)["input_ids"])#completion["usage"]["total_tokens"]
57
+ yield "", updated_message_history, updated_chatlog_history, updated_chatlog_history, token_count
58
+ # assistant_message = completion["choices"][0]["message"]["content"]
59
+ # return "", updated_message_history, updated_chatlog_history, updated_chatlog_history, token_count
60
 
61
  def retry_completion(model, personality, message_history, chatlog_history, temperature, maximum_length, top_p, frequency_penalty, presence_penalty):
62
  # set personality
 
69
  updated_chatlog_history = chatlog_history[:-1]
70
  # delete latest assistant message from message_history
71
  updated_message_history = updated_message_history[:-1]
72
+ response = openai.ChatCompletion.create(
73
+ model=model,
74
+ messages=updated_message_history,
75
+ temperature=temperature,
76
+ max_tokens=maximum_length,
77
+ top_p=top_p,
78
+ frequency_penalty=frequency_penalty,
79
+ presence_penalty=presence_penalty,
80
+ stream=True,
 
 
 
 
 
 
 
 
81
  )
82
+ new_history_row = {"role": "assistant", "content": ""}
 
 
 
 
 
 
 
 
 
 
 
83
  updated_message_history = updated_message_history + [new_history_row]
84
+ updated_chatlog_history = updated_chatlog_history + [[user_message, ""]]
85
+ # create variables to collect the stream of chunks
86
+ collected_chunks = []
87
+ collected_messages = []
88
+ # iterate through the stream of events
89
+ for chunk in response:
90
+ collected_chunks.append(chunk) # save the event response
91
+ chunk_message = chunk['choices'][0]['delta'] # extract the message
92
+ collected_messages.append(chunk_message) # save the message
93
+ assistant_message = ''.join([m.get('content', '') for m in collected_messages])
94
+ updated_message_history[-1]["content"] = assistant_message
95
+ updated_chatlog_history[-1][1] = assistant_message
96
+ full_prompt = '\n'.join([row[0] + row[1] for row in updated_chatlog_history])
97
+ token_count = len(tokenizer(full_prompt)["input_ids"])#completion["usage"]["total_tokens"]
98
+ yield "", updated_message_history, updated_chatlog_history, updated_chatlog_history, token_count
99
 
100
  def reset_chat():
101
  return "", [default_system_message], [], [], 0
 
130
  retry_button.click(retry_completion, inputs=[model, personality, message_history, chatlog_history, temperature, maximum_length, top_p, frequency_penalty, presence_penalty], outputs=[user_message, message_history, chatlog_history, chatbot, token_count])
131
  reset_button.click(reset_chat, inputs=[], outputs=[user_message, message_history, chatlog_history, chatbot, token_count])
132
 
133
+ app.launch(auth=("admin", ADMIN_PASSWORD), enable_queue=True)
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  gradio
2
- openai
 
 
1
  gradio
2
+ openai
3
+ transformers