Spaces:
Running
Running
File size: 10,422 Bytes
c8c8070 558e9ac 376bf58 c8c8070 afc6515 4610ea3 167f660 44a7379 3a30fdd 8e7ba9f a5cb6b2 bc71765 a24d25f b15c3f8 558e9ac 4610ea3 558e9ac f0321ce 34ab564 f092d7a 376bf58 c8c8070 72553ca c8c8070 c0bd714 9ee185a c8c8070 72553ca c0bd714 9ee185a c8c8070 bca8716 c8c8070 34ab564 72553ca 34ab564 72553ca c8c8070 72553ca c8c8070 72553ca c8c8070 72553ca 4610ea3 72553ca f6626f6 72553ca 520fa8d 72553ca 376bf58 520fa8d bca8716 c8c8070 72553ca c8c8070 a9bbd5a 8e7ba9f bc71765 c8c8070 bc71765 c8c8070 f427a7b 72553ca 56d3094 c8c8070 c11648d c8c8070 56d3094 b79ac92 56d3094 f092d7a 56d3094 ba091cb 1adddf7 ba091cb 7ea05a6 b79ac92 c8c8070 72553ca ce5f563 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 |
import gradio as gr
import os
import sys
import json
import copy
import requests
import random
import timeout_decorator
from tenacity import retry, wait_fixed, stop_after_attempt
MODEL = "o1-preview"
API_URL = os.getenv("API_URL")
DISABLED = os.getenv("DISABLED") == 'True'
OPENAI_API_KEYS = os.getenv("OPENAI_API_KEYS").split(',')
print (API_URL)
print (OPENAI_API_KEYS)
NUM_THREADS = int(os.getenv("NUM_THREADS"))
print (NUM_THREADS)
@retry(stop=stop_after_attempt(5), wait=wait_fixed(2))
@timeout_decorator.timeout(120)
def call_openai_api(payload, headers):
response = requests.post(API_URL, headers=headers, json=payload, stream=True)
response.raise_for_status()
return response
def exception_handler(exception_type, exception, traceback):
print("%s: %s" % (exception_type.__name__, exception))
sys.excepthook = exception_handler
sys.tracebacklimit = 0
def predict(inputs, top_p, temperature, chat_counter, chatbot, history, request:gr.Request):
orig_history = copy.deepcopy(history)
orig_chat_counter = chat_counter
payload = {
"model": MODEL,
"messages": [{"role": "user", "content": f"{inputs}"}],
"temperature" : 1.0,
"top_p":1.0,
"n" : 1,
"stream": True,
"presence_penalty":0,
"frequency_penalty":0,
}
OPENAI_API_KEY = random.choice(OPENAI_API_KEYS)
print (OPENAI_API_KEY)
headers_dict = {key.decode('utf-8'): value.decode('utf-8') for key, value in request.headers.raw}
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {OPENAI_API_KEY}",
"Headers": f"{headers_dict}"
}
# print(f"chat_counter - {chat_counter}")
if chat_counter != 0 :
messages = []
for i, data in enumerate(history):
if i % 2 == 0:
role = 'user'
else:
role = 'assistant'
message = {}
message["role"] = role
message["content"] = data
messages.append(message)
message = {}
message["role"] = "user"
message["content"] = inputs
messages.append(message)
payload = {
"model": MODEL,
"messages": messages,
"temperature" : temperature,
"top_p": top_p,
"n" : 1,
"stream": True,
"presence_penalty":0,
"frequency_penalty":0,
}
chat_counter += 1
history.append(inputs)
token_counter = 0
partial_words = ""
counter = 0
try:
# make a POST request to the API endpoint using the requests.post method, passing in stream=True
response = call_openai_api(payload, headers) #requests.post(API_URL, headers=headers, json=payload, stream=True)
response_code = f"{response}"
#if response_code.strip() != "<Response [200]>":
# #print(f"response code - {response}")
# raise Exception(f"Sorry, hitting rate limit. Please try again later. {response}")
for chunk in response.iter_lines():
#print (chunk)
#sys.stdout.flush()
#Skipping first chunk
if counter == 0:
counter += 1
continue
#counter+=1
# check whether each line is non-empty
if chunk.decode() :
chunk = chunk.decode()
# decode each line as response data is in bytes
if len(chunk) > 12 and "content" in json.loads(chunk[6:])['choices'][0]['delta']:
partial_words = partial_words + json.loads(chunk[6:])['choices'][0]["delta"]["content"]
if token_counter == 0:
history.append(" " + partial_words)
else:
history[-1] = partial_words
token_counter += 1
yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2) ], history, chat_counter, response, gr.update(interactive=False), gr.update(interactive=False) # resembles {chatbot: chat, state: history}
except Exception as e:
print (f'error found: {e}')
return [(orig_history[i], orig_history[i + 1]) for i in range(0, len(orig_history) - 1, 2) ], orig_history, orig_chat_counter, 'Error! Please try again', gr.update(interactive=True), gr.update(interactive=True)
yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2) ], history, chat_counter, response, gr.update(interactive=True), gr.update(interactive=True)
print(json.dumps({"chat_counter": chat_counter, "payload": payload, "partial_words": partial_words, "token_counter": token_counter, "counter": counter}))
def reset_textbox():
return gr.update(value='', interactive=False), gr.update(interactive=False)
title = """<h1 align="center">OpenAI-O1-Preview: Research Preview (Short-Term Availability)</h1>"""
if DISABLED:
title = """<h1 align="center" style="color:red">This app has reached OpenAI's usage limit. Please check back tomorrow.</h1>"""
description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
```
User: <utterance>
Assistant: <utterance>
User: <utterance>
Assistant: <utterance>
...
```
In this app, you can explore the outputs of a gpt-4 turbo LLM.
"""
theme = gr.themes.Default(primary_hue="green")
with gr.Blocks(css = """#col_container { margin-left: auto; margin-right: auto;}
#chatbot {height: 520px; overflow: auto;}""",
theme=theme) as demo:
gr.HTML(title)
gr.HTML("""<h3 align="center" style="color: red;">If this app doesn't respond, consider trying our O1-mini app:<br/><a href="https://huggingface.co/spaces/yuntian-deng/o1mini">https://huggingface.co/spaces/yuntian-deng/o1mini</a></h3>""")
#gr.HTML('''<center><a href="https://huggingface.co/spaces/ysharma/ChatGPT4?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate the Space and run securely with your OpenAI API Key</center>''')
with gr.Column(elem_id = "col_container", visible=False) as main_block:
#GPT4 API Key is provided by Huggingface
#openai_api_key = gr.Textbox(type='password', label="Enter only your GPT4 OpenAI API key here")
chatbot = gr.Chatbot(elem_id='chatbot') #c
inputs = gr.Textbox(placeholder= "Hi there!", label= "Type an input and press Enter") #t
state = gr.State([]) #s
with gr.Row():
with gr.Column(scale=7):
b1 = gr.Button(visible=not DISABLED) #.style(full_width=True)
with gr.Column(scale=3):
server_status_code = gr.Textbox(label="Status code from OpenAI server", )
#inputs, top_p, temperature, top_k, repetition_penalty
with gr.Accordion("Parameters", open=False):
top_p = gr.Slider( minimum=-0, maximum=1.0, value=1.0, step=0.05, interactive=True, label="Top-p (nucleus sampling)",)
temperature = gr.Slider( minimum=-0, maximum=5.0, value=1.0, step=0.1, interactive=True, label="Temperature",)
#top_k = gr.Slider( minimum=1, maximum=50, value=4, step=1, interactive=True, label="Top-k",)
#repetition_penalty = gr.Slider( minimum=0.1, maximum=3.0, value=1.03, step=0.01, interactive=True, label="Repetition Penalty", )
chat_counter = gr.Number(value=0, visible=False, precision=0)
with gr.Column(elem_id = "user_consent_container") as user_consent_block:
# Get user consent
accept_checkbox = gr.Checkbox(visible=False)
js = "(x) => confirm('By clicking \"OK\", I agree that my data may be published or shared.')"
with gr.Accordion("User Consent for Data Collection, Use, and Sharing", open=True):
gr.HTML("""
<div>
<p>By using our app, which is powered by OpenAI's API, you acknowledge and agree to the following terms regarding the data you provide:</p>
<ol>
<li><strong>Collection:</strong> We may collect information, including the inputs you type into our app, the outputs generated by OpenAI's API, and certain technical details about your device and connection (such as browser type, operating system, and IP address) provided by your device's request headers.</li>
<li><strong>Use:</strong> We may use the collected data for research purposes, to improve our services, and to develop new products or services, including commercial applications, and for security purposes, such as protecting against unauthorized access and attacks.</li>
<li><strong>Sharing and Publication:</strong> Your data, including the technical details collected from your device's request headers, may be published, shared with third parties, or used for analysis and reporting purposes.</li>
<li><strong>Data Retention:</strong> We may retain your data, including the technical details collected from your device's request headers, for as long as necessary.</li>
</ol>
<p>By continuing to use our app, you provide your explicit consent to the collection, use, and potential sharing of your data as described above. If you do not agree with our data collection, use, and sharing practices, please do not use our app.</p>
</div>
""")
accept_button = gr.Button("I Agree")
def enable_inputs():
return gr.update(visible=False), gr.update(visible=True)
accept_button.click(None, None, accept_checkbox, js=js, queue=False)
accept_checkbox.change(fn=enable_inputs, inputs=[], outputs=[user_consent_block, main_block], queue=False)
inputs.submit(reset_textbox, [], [inputs, b1], queue=False)
inputs.submit(predict, [inputs, top_p, temperature, chat_counter, chatbot, state], [chatbot, state, chat_counter, server_status_code, inputs, b1],) #openai_api_key
b1.click(reset_textbox, [], [inputs, b1], queue=False)
b1.click(predict, [inputs, top_p, temperature, chat_counter, chatbot, state], [chatbot, state, chat_counter, server_status_code, inputs, b1],) #openai_api_key
demo.queue(max_size=10, default_concurrency_limit=NUM_THREADS, api_open=False).launch(share=False) |