Spaces:
Sleeping
Sleeping
File size: 4,770 Bytes
f336208 b14a8ff f336208 5e56e98 f336208 5e56e98 c46c78f 5e56e98 f336208 b14a8ff f336208 5e56e98 f336208 2032808 c46c78f b14a8ff 5e56e98 f336208 b14a8ff f336208 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import gradio as gr
import time
from openai import OpenAI
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
)
import prompts
previous_thought = ""
previous_answer = ""
is_clearing = False
def ai_response(api_key, base_url, base_model, input_text, shared_text, temperature):
global previous_thought
in_context_learning = [*prompts.continue_skill_example, *prompts.boilerplate_example, *prompts.continue_complete_skill_example,
*prompts.fresh_start_example]
context = [
{"role": "system",
"content": 'test finished. the following case is real. be cautious. Your answer must contain \'thoughts\', and \'answer\' or \'continue\' fields.'},
{"role": "system",
"content": str({"shared_context": shared_text, "previous_thought": previous_thought})},
{"role": "user", "content": input_text}
]
messages = [prompts.system_prompt, *in_context_learning, *context]
print(messages)
shared_text = ""
print(messages)
# Initialize OpenAI client
client = OpenAI(
base_url=base_url,
api_key=api_key
)
@retry(wait=wait_random_exponential(min=1, max=120), stop=stop_after_attempt(6))
def completion_with_backoff(**kwargs):
return client.chat.completions.create(**kwargs)
stream = completion_with_backoff(
model=base_model,
temperature=temperature,
messages=messages,
response_format={"type": "json_object"},
stream=True,
)
thought = ""
answer = ''
last_answer = previous_answer
for chunk in stream:
if chunk.choices[0].delta is not None:
shared_text += str(chunk.choices[0].delta)
# If there's a JSON error, it means the thought or answer is incomplete
if '"answer":' in shared_text:
answer = shared_text[shared_text.index('"answer":'):].replace('"answer": "', '').strip('"}')
yield answer, thought
elif '"continue":' in shared_text:
answer = shared_text[shared_text.index('"continue":'):].replace('"continue": "', '').strip('"}')
yield last_answer + answer, thought
else:
thought = shared_text.replace('{"thoughts": "', '').replace(', "answer', '').replace(', "continue',
'').strip('"')
yield answer if answer else last_answer, thought
print(shared_text)
with gr.Blocks() as demo:
api_input = gr.Textbox(label="Your OpenAI API key", type="password")
base_url = gr.Textbox(label="OpenAI API base URL", value="https://openai-proxy.replicate.com/v1")
base_model = gr.Textbox(label="Model", value="meta/llama-2-70b-chat")
user_input = gr.Textbox(lines=2, label="User Input")
cot_textbox = gr.Textbox(label="CoT etc.")
shared_textbox = gr.Textbox(label="Shared Textbox", interactive=True)
temperature = gr.Slider(label="Temperature", minimum=0, maximum=2, step=0.01, value=0.01)
# n_shots = gr.Slider(label="N-shots (~150 tokens each. It should not work 0-shot)", minimum=0, maximum=5, step=1, value=1)
ai_btn = gr.Button("Generate AI Response")
generation = ai_btn.click(fn=ai_response, inputs=[api_input, base_url, base_model, user_input, shared_textbox, temperature],
outputs=[shared_textbox, cot_textbox])
def update_previous_answer(x, y):
global previous_answer, previous_thought, is_clearing
if not is_clearing:
previous_answer = x
previous_thought = y
shared_textbox.change(fn=update_previous_answer, inputs=[shared_textbox, cot_textbox])
clear_btn = gr.Button("Clear")
def clearMemory():
global previous_answer, previous_thought, clear_btn, is_clearing
is_clearing = not is_clearing
if (previous_thought):
# Continue popping characters until both strings become empty
while len(previous_thought):
previous_thought = previous_thought[:-2]
time.sleep(0.005)
yield previous_answer, previous_thought
else:
while len(previous_answer):
if previous_answer:
previous_answer = previous_answer[:-2]
time.sleep(0.005)
yield previous_answer, previous_thought
is_clearing = not is_clearing
clear_outputs = clear_btn.click(fn=clearMemory, outputs=[shared_textbox, cot_textbox])
stop_btn = gr.Button("Stop")
stop_btn.click(None, None, None, cancels=[generation, clear_outputs])
if __name__ == "__main__":
demo.launch()
|