Spaces:

codelion
/

optillm

Running

File size: 3,411 Bytes

4eaa76b
7e12b4f
4eaa76b
786bb0f
4527045
3c3463c
a858ac0
 
 
 
 
 
 
7e12b4f
80cdbfa
786bb0f
7e12b4f
 
 
6003553
 
7e12b4f
 
 
 
 
80cdbfa
4527045
7e12b4f
 
 
 
 
 
 
 
 
 
a858ac0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4527045
7e12b4f
4527045
 
 
 
 
 
 
 
7e12b4f
4527045
 
7e12b4f
 
 
 
 
 
 
4959bf1
80cdbfa
 
 
6e74755
4959bf1
a858ac0
6e74755
7377b55
7e12b4f

import os
import gradio as gr

from openai import OpenAI

from optillm.cot_reflection import cot_reflection
from optillm.rto import round_trip_optimization
from optillm.z3_solver import Z3SolverSystem
from optillm.self_consistency import advanced_self_consistency_approach
from optillm.rstar import RStar
from optillm.plansearch import plansearch
from optillm.leap import leap


API_KEY = os.environ.get("OPENROUTER_API_KEY")

def respond(
    message,
    history: list[tuple[str, str]],
    model,
    approach,
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    client = OpenAI(api_key=API_KEY, base_url="https://openrouter.ai/api/v1")
    
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    if approach == 'rto':
        final_response = round_trip_optimization(system_prompt, initial_query, client, model)
    elif approach == 'z3':
        z3_solver = Z3SolverSystem(system_prompt, client, model)
        final_response = z3_solver.process_query(initial_query)
    elif approach == "self_consistency":
        final_response = advanced_self_consistency_approach(system_prompt, initial_query, client, model)
    elif approach == "rstar":
        rstar = RStar(system_prompt, client, model)
        final_response = rstar.solve(initial_query)
    elif approach == "cot_reflection":
        final_response = cot_reflection(system_prompt, initial_query, client, model)
    elif approach == 'plansearch':
        final_response = plansearch(system_prompt, initial_query, client, model)
    elif approach == 'leap':
        final_response = leap(system_prompt, initial_query, client, model)
        
    return final_response

    # for message in client.chat_completion(
    #     messages,
    #     max_tokens=max_tokens,
    #     stream=True,
    #     temperature=temperature,
    #     top_p=top_p,
    # ):
    #     token = message.choices[0].delta.content

    #     response += token
    #     yield response

"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Dropdown(
            ["nousresearch/hermes-3-llama-3.1-405b:free", "meta-llama/llama-3.1-8b-instruct:free", "qwen/qwen-2-7b-instruct:free",
            "google/gemma-2-9b-it:free", "mistralai/mistral-7b-instruct:free", ], 
            value="nousresearch/hermes-3-llama-3.1-405b:free", label="Model", info="Choose the base model"
        ),
        gr.Dropdown(
            ["leap", "plansearch", "rstar", "cot_reflection", "rto", "self_consistency", "z3"], value="cot_reflection", label="Approach", info="Choose the approach"
        ),
        gr.Textbox(value="", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)

if __name__ == "__main__":
    demo.launch()