import gradio as gr import os import json import logging import numpy as np from utils import (PromptTemplate, api_configs, setup_logging) from data_loader import load_data from evaluate import evaluate from main import SwiftSage, run_test, run_benchmark import multiprocessing def solve_problem(problem, max_iterations, reward_threshold, swift_model_id, sage_model_id, reward_model_id, use_retrieval, start_with_sage): # Configuration for each LLM max_iterations = int(max_iterations) reward_threshold = int(reward_threshold) swift_config = { "model_id": swift_model_id, "api_config": api_configs['Together'] } reward_config = { "model_id": reward_model_id, "api_config": api_configs['Together'] } sage_config = { "model_id": sage_model_id, "api_config": api_configs['Together'] } # specify the path to the prompt templates prompt_template_dir = './prompt_templates' dataset = [] embeddings = [] # TODO: for retrieval augmentation (not implemented yet now) s2 = SwiftSage( dataset, embeddings, prompt_template_dir, swift_config, sage_config, reward_config, use_retrieval=use_retrieval, start_with_sage=start_with_sage, ) reasoning, solution = s2.solve(problem, max_iterations, reward_threshold) solution = solution.replace("Answer (from running the code):\n ", " ") return reasoning, solution with gr.Blocks(theme=gr.themes.Soft()) as demo: # gr.Markdown("## SwiftSage: A Multi-Agent Framework for Reasoning") # use the html and center the title gr.HTML("

SwiftSage: A Multi-Agent Framework for Reasoning

") with gr.Row(): swift_model_id = gr.Textbox(label="😄 Swift Model ID", value="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo") reward_model_id = gr.Textbox(label="🤔 Feedback Model ID", value="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo") sage_model_id = gr.Textbox(label="😎 Sage Model ID", value="meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo") # the following two should have a smaller width with gr.Accordion(label="⚙️ Advanced Options", open=False): with gr.Row(): with gr.Column(): max_iterations = gr.Textbox(label="Max Iterations", value="5") reward_threshold = gr.Textbox(label="Reward Threshold", value="8") # TODO: add top-p and temperature for each module for controlling with gr.Column(): top_p_swift = gr.Textbox(label="Top-p for Swift", value="0.9") temperature_swift = gr.Textbox(label="Temperature for Swift", value="0.7") with gr.Column(): top_p_sage = gr.Textbox(label="Top-p for Sage", value="0.9") temperature_sage = gr.Textbox(label="Temperature for Sage", value="0.7") with gr.Column(): top_p_reward = gr.Textbox(label="Top-p for Feedback", value="0.9") temperature_reward = gr.Textbox(label="Temperature for Feedback", value="0.7") use_retrieval = gr.Checkbox(label="Use Retrieval Augmentation", value=False, visible=False) start_with_sage = gr.Checkbox(label="Start with Sage", value=False, visible=False) problem = gr.Textbox(label="Input your problem", value="How many letter r are there in the sentence 'My strawberry is so ridiculously red.'?", lines=2) solve_button = gr.Button("🚀 Solve Problem") reasoning_output = gr.Textbox(label="Reasoning steps with Code", interactive=False) solution_output = gr.Textbox(label="Final answer", interactive=False) solve_button.click( solve_problem, inputs=[problem, max_iterations, reward_threshold, swift_model_id, sage_model_id, reward_model_id, use_retrieval, start_with_sage], outputs=[reasoning_output, solution_output] ) if __name__ == '__main__': multiprocessing.set_start_method('spawn') demo.launch(share=False, show_api=False)