Spaces:
Running
Running
File size: 3,411 Bytes
4eaa76b 7e12b4f 4eaa76b 786bb0f 4527045 3c3463c a858ac0 7e12b4f 80cdbfa 786bb0f 7e12b4f 6003553 7e12b4f 80cdbfa 4527045 7e12b4f a858ac0 4527045 7e12b4f 4527045 7e12b4f 4527045 7e12b4f 4959bf1 80cdbfa 6e74755 4959bf1 a858ac0 6e74755 7377b55 7e12b4f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import os
import gradio as gr
from openai import OpenAI
from optillm.cot_reflection import cot_reflection
from optillm.rto import round_trip_optimization
from optillm.z3_solver import Z3SolverSystem
from optillm.self_consistency import advanced_self_consistency_approach
from optillm.rstar import RStar
from optillm.plansearch import plansearch
from optillm.leap import leap
API_KEY = os.environ.get("OPENROUTER_API_KEY")
def respond(
message,
history: list[tuple[str, str]],
model,
approach,
system_message,
max_tokens,
temperature,
top_p,
):
client = OpenAI(api_key=API_KEY, base_url="https://openrouter.ai/api/v1")
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
if approach == 'rto':
final_response = round_trip_optimization(system_prompt, initial_query, client, model)
elif approach == 'z3':
z3_solver = Z3SolverSystem(system_prompt, client, model)
final_response = z3_solver.process_query(initial_query)
elif approach == "self_consistency":
final_response = advanced_self_consistency_approach(system_prompt, initial_query, client, model)
elif approach == "rstar":
rstar = RStar(system_prompt, client, model)
final_response = rstar.solve(initial_query)
elif approach == "cot_reflection":
final_response = cot_reflection(system_prompt, initial_query, client, model)
elif approach == 'plansearch':
final_response = plansearch(system_prompt, initial_query, client, model)
elif approach == 'leap':
final_response = leap(system_prompt, initial_query, client, model)
return final_response
# for message in client.chat_completion(
# messages,
# max_tokens=max_tokens,
# stream=True,
# temperature=temperature,
# top_p=top_p,
# ):
# token = message.choices[0].delta.content
# response += token
# yield response
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Dropdown(
["nousresearch/hermes-3-llama-3.1-405b:free", "meta-llama/llama-3.1-8b-instruct:free", "qwen/qwen-2-7b-instruct:free",
"google/gemma-2-9b-it:free", "mistralai/mistral-7b-instruct:free", ],
value="nousresearch/hermes-3-llama-3.1-405b:free", label="Model", info="Choose the base model"
),
gr.Dropdown(
["leap", "plansearch", "rstar", "cot_reflection", "rto", "self_consistency", "z3"], value="cot_reflection", label="Approach", info="Choose the approach"
),
gr.Textbox(value="", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
if __name__ == "__main__":
demo.launch() |