File size: 10,422 Bytes
c8c8070
558e9ac
 
376bf58
 
c8c8070
afc6515
4610ea3
 
167f660
44a7379
3a30fdd
8e7ba9f
a5cb6b2
bc71765
a24d25f
b15c3f8
 
 
558e9ac
4610ea3
 
 
 
 
 
 
558e9ac
 
 
f0321ce
34ab564
f092d7a
376bf58
 
c8c8070
72553ca
 
 
 
 
 
 
 
c8c8070
c0bd714
 
9ee185a
c8c8070
72553ca
c0bd714
9ee185a
c8c8070
 
bca8716
c8c8070
34ab564
 
72553ca
 
 
 
 
 
 
 
34ab564
72553ca
 
 
 
c8c8070
72553ca
 
 
 
 
 
 
 
c8c8070
 
72553ca
c8c8070
 
 
 
72553ca
 
 
 
4610ea3
72553ca
 
 
 
 
 
f6626f6
 
72553ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
520fa8d
72553ca
 
376bf58
 
 
520fa8d
bca8716
c8c8070
 
 
72553ca
c8c8070
a9bbd5a
8e7ba9f
bc71765
c8c8070
 
 
 
 
 
 
 
bc71765
c8c8070
 
 
 
 
 
 
 
f427a7b
72553ca
 
56d3094
c8c8070
 
 
 
 
 
 
c11648d
c8c8070
 
 
 
 
 
 
 
 
 
56d3094
 
 
b79ac92
 
56d3094
 
 
 
 
f092d7a
 
 
 
56d3094
 
 
 
 
ba091cb
 
1adddf7
ba091cb
7ea05a6
b79ac92
c8c8070
72553ca
 
 
 
 
ce5f563
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
import gradio as gr
import os
import sys
import json
import copy
import requests
import random
import timeout_decorator
from tenacity import retry, wait_fixed, stop_after_attempt

MODEL = "o1-preview"
API_URL = os.getenv("API_URL")
DISABLED = os.getenv("DISABLED") == 'True'
OPENAI_API_KEYS = os.getenv("OPENAI_API_KEYS").split(',')
print (API_URL)
print (OPENAI_API_KEYS)
NUM_THREADS = int(os.getenv("NUM_THREADS"))

print (NUM_THREADS)

@retry(stop=stop_after_attempt(5), wait=wait_fixed(2))
@timeout_decorator.timeout(120)
def call_openai_api(payload, headers):
    response = requests.post(API_URL, headers=headers, json=payload, stream=True)
    response.raise_for_status()
    return response

def exception_handler(exception_type, exception, traceback):
    print("%s: %s" % (exception_type.__name__, exception))
sys.excepthook = exception_handler
sys.tracebacklimit = 0
    
def predict(inputs, top_p, temperature, chat_counter, chatbot, history, request:gr.Request):
    orig_history = copy.deepcopy(history)
    orig_chat_counter = chat_counter
    payload = {
        "model": MODEL,
        "messages": [{"role": "user", "content": f"{inputs}"}],
        "temperature" : 1.0,
        "top_p":1.0,
        "n" : 1,
        "stream": True,
        "presence_penalty":0,
        "frequency_penalty":0,
    }
    OPENAI_API_KEY = random.choice(OPENAI_API_KEYS)
    print (OPENAI_API_KEY)
    headers_dict = {key.decode('utf-8'): value.decode('utf-8') for key, value in request.headers.raw}
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {OPENAI_API_KEY}",
        "Headers": f"{headers_dict}"
    }

    # print(f"chat_counter - {chat_counter}")
    if chat_counter != 0 :
        messages = []
        for i, data in enumerate(history):
            if i % 2 == 0:
                role = 'user'
            else:
                role = 'assistant'
            message = {}
            message["role"] = role
            message["content"] = data
            messages.append(message)
        
        message = {}
        message["role"] = "user" 
        message["content"] = inputs
        messages.append(message)
        payload = {
            "model": MODEL,
            "messages": messages,
            "temperature" : temperature,
            "top_p": top_p,
            "n" : 1,
            "stream": True,
            "presence_penalty":0,
            "frequency_penalty":0,
        }

    chat_counter += 1

    history.append(inputs)
    token_counter = 0 
    partial_words = "" 
    counter = 0

    try:
        # make a POST request to the API endpoint using the requests.post method, passing in stream=True
        response = call_openai_api(payload, headers) #requests.post(API_URL, headers=headers, json=payload, stream=True)
        response_code = f"{response}"
        #if response_code.strip() != "<Response [200]>":
        #    #print(f"response code - {response}")
        #    raise Exception(f"Sorry, hitting rate limit. Please try again later. {response}")
        
        for chunk in response.iter_lines():
            #print (chunk)
            #sys.stdout.flush()
            #Skipping first chunk
            if counter == 0:
                counter += 1
                continue
                #counter+=1
            # check whether each line is non-empty
            if chunk.decode() :
                chunk = chunk.decode()
                # decode each line as response data is in bytes
                if len(chunk) > 12 and "content" in json.loads(chunk[6:])['choices'][0]['delta']:
                    partial_words = partial_words + json.loads(chunk[6:])['choices'][0]["delta"]["content"]
                    if token_counter == 0:
                        history.append(" " + partial_words)
                    else:
                        history[-1] = partial_words
                    token_counter += 1
                    yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2) ], history, chat_counter, response, gr.update(interactive=False), gr.update(interactive=False)  # resembles {chatbot: chat, state: history}  
    except Exception as e:
        print (f'error found: {e}')
        return [(orig_history[i], orig_history[i + 1]) for i in range(0, len(orig_history) - 1, 2) ], orig_history, orig_chat_counter, 'Error! Please try again', gr.update(interactive=True), gr.update(interactive=True)
        

    yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2) ], history, chat_counter, response, gr.update(interactive=True), gr.update(interactive=True)
    print(json.dumps({"chat_counter": chat_counter, "payload": payload, "partial_words": partial_words, "token_counter": token_counter, "counter": counter}))
                   

def reset_textbox():
    return gr.update(value='', interactive=False), gr.update(interactive=False)

title = """<h1 align="center">OpenAI-O1-Preview: Research Preview (Short-Term Availability)</h1>"""
if DISABLED:
    title = """<h1 align="center" style="color:red">This app has reached OpenAI's usage limit. Please check back tomorrow.</h1>"""
description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
```
User: <utterance>
Assistant: <utterance>
User: <utterance>
Assistant: <utterance>
...
```
In this app, you can explore the outputs of a gpt-4 turbo LLM.
"""

theme = gr.themes.Default(primary_hue="green")                

with gr.Blocks(css = """#col_container { margin-left: auto; margin-right: auto;}
                #chatbot {height: 520px; overflow: auto;}""",
              theme=theme) as demo:
    gr.HTML(title)
    gr.HTML("""<h3 align="center" style="color: red;">If this app doesn't respond, consider trying our O1-mini app:<br/><a href="https://huggingface.co/spaces/yuntian-deng/o1mini">https://huggingface.co/spaces/yuntian-deng/o1mini</a></h3>""")

    #gr.HTML('''<center><a href="https://huggingface.co/spaces/ysharma/ChatGPT4?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate the Space and run securely with your OpenAI API Key</center>''')
    with gr.Column(elem_id = "col_container", visible=False) as main_block:
        #GPT4 API Key is provided by Huggingface 
        #openai_api_key = gr.Textbox(type='password', label="Enter only your GPT4 OpenAI API key here")
        chatbot = gr.Chatbot(elem_id='chatbot') #c
        inputs = gr.Textbox(placeholder= "Hi there!", label= "Type an input and press Enter") #t
        state = gr.State([]) #s
        with gr.Row():
            with gr.Column(scale=7):
                b1 = gr.Button(visible=not DISABLED) #.style(full_width=True)
            with gr.Column(scale=3):
                server_status_code = gr.Textbox(label="Status code from OpenAI server", )
    
        #inputs, top_p, temperature, top_k, repetition_penalty
        with gr.Accordion("Parameters", open=False):
            top_p = gr.Slider( minimum=-0, maximum=1.0, value=1.0, step=0.05, interactive=True, label="Top-p (nucleus sampling)",)
            temperature = gr.Slider( minimum=-0, maximum=5.0, value=1.0, step=0.1, interactive=True, label="Temperature",)
            #top_k = gr.Slider( minimum=1, maximum=50, value=4, step=1, interactive=True, label="Top-k",)
            #repetition_penalty = gr.Slider( minimum=0.1, maximum=3.0, value=1.03, step=0.01, interactive=True, label="Repetition Penalty", )
            chat_counter = gr.Number(value=0, visible=False, precision=0)
    
    with gr.Column(elem_id = "user_consent_container") as user_consent_block:
        # Get user consent
        accept_checkbox = gr.Checkbox(visible=False)
        js = "(x) => confirm('By clicking \"OK\", I agree that my data may be published or shared.')"
        with gr.Accordion("User Consent for Data Collection, Use, and Sharing", open=True):
            gr.HTML("""
            <div>
                <p>By using our app, which is powered by OpenAI's API, you acknowledge and agree to the following terms regarding the data you provide:</p>
                <ol>
                    <li><strong>Collection:</strong> We may collect information, including the inputs you type into our app, the outputs generated by OpenAI's API, and certain technical details about your device and connection (such as browser type, operating system, and IP address) provided by your device's request headers.</li>
                    <li><strong>Use:</strong> We may use the collected data for research purposes, to improve our services, and to develop new products or services, including commercial applications, and for security purposes, such as protecting against unauthorized access and attacks.</li>
                    <li><strong>Sharing and Publication:</strong> Your data, including the technical details collected from your device's request headers, may be published, shared with third parties, or used for analysis and reporting purposes.</li>
                    <li><strong>Data Retention:</strong> We may retain your data, including the technical details collected from your device's request headers, for as long as necessary.</li>
                </ol>
                <p>By continuing to use our app, you provide your explicit consent to the collection, use, and potential sharing of your data as described above. If you do not agree with our data collection, use, and sharing practices, please do not use our app.</p>
            </div>
            """)
            accept_button = gr.Button("I Agree")

        def enable_inputs():
            return gr.update(visible=False), gr.update(visible=True)

    accept_button.click(None, None, accept_checkbox, js=js, queue=False)
    accept_checkbox.change(fn=enable_inputs, inputs=[], outputs=[user_consent_block, main_block], queue=False)

    inputs.submit(reset_textbox, [], [inputs, b1], queue=False)
    inputs.submit(predict, [inputs, top_p, temperature, chat_counter, chatbot, state], [chatbot, state, chat_counter, server_status_code, inputs, b1],)  #openai_api_key
    b1.click(reset_textbox, [], [inputs, b1], queue=False)
    b1.click(predict, [inputs, top_p, temperature, chat_counter, chatbot, state], [chatbot, state, chat_counter, server_status_code, inputs, b1],)  #openai_api_key
             
    demo.queue(max_size=10, default_concurrency_limit=NUM_THREADS, api_open=False).launch(share=False)