Spaces:

vetrovvlad
/

protobench

Sleeping

protobench / app.py

vtrv.vls

Added header and banner

dd9842f 7 months ago

12.2 kB

	import gradio
	import argparse
	import os
	import boto3
	import pandas as pd
	from copy import copy
	from random import choice

	import queue

	from constants import css, js_code, js_light, BANNER
	from utils import model_response, clear_chat
	from models import get_tinyllama, get_qwen2ins1b, GigaChat, response_gigachat, response_qwen2ins1b, response_tinyllama

	INIT_MODELS = dict()
	S3_SESSION = None
	HEADER_MD = None
	ABOUT_MD = None
	CURRENT_MODELS = queue.LifoQueue()
	MODEL_LIB = {'TINYLLAMA': get_tinyllama, "QWEN2INS1B": get_qwen2ins1b, "RUBASE": GigaChat.get_giga}
	GEN_LIB = {'TINYLLAMA': response_tinyllama, "QWEN2INS1B": response_qwen2ins1b, "RUBASE": response_gigachat}
	MODEL_LIST = ["TINYLLAMA", "QWEN2INS1B", "RUBASE"]

	css = """
	#small span{
	font-size: 0.7em;
	}
	"""

	async def model_gen(
	content,
	chat_history,
	model_name: str,
	top_p,
	temp,
	max_tokens,
	no_context=False
	):

	global INIT_MODELS, S3_SESSION, GEN_LIB, MODEL_LIB
	model_manager(model_name, MODEL_LIB, 3)
	if content is None:
	return '', []
	if len(content) == 0:
	return '', []

	chat_history = [] if no_context else chat_history

	res = await model_response(
	content,
	chat_history,
	S3_SESSION,
	INIT_MODELS,
	GEN_LIB,
	model_name,
	{"top_p": top_p, "temperature": temp, "max_tokens": max_tokens}
	)

	return res

	async def model_regen(
	content,
	chat_history,
	model_name: str,
	top_p,
	temp,
	max_tokens,
	no_context=False
	):

	global INIT_MODELS, S3_SESSION, GEN_LIB, MODEL_LIB
	model_manager(model_name, MODEL_LIB, 3)
	if chat_history is None:
	return '', []

	chat_history = chat_history[-1] if no_context else chat_history
	content = copy(chat_history[-1][0])

	res = await model_response(
	content,
	chat_history[:-1],
	S3_SESSION,
	INIT_MODELS,
	GEN_LIB,
	model_name,
	{"top_p": top_p, "temperature": temp, "max_tokens": max_tokens}
	)

	return res

	def model_manager(
	add_model,
	model_lib,
	max_models=3
	):
	global INIT_MODELS, CURRENT_MODELS
	while CURRENT_MODELS.qsize() >= max_models:
	model_del = CURRENT_MODELS.get()
	INIT_MODELS[model_del] = None
	CURRENT_MODELS.put(add_model)
	INIT_MODELS[add_model] = model_lib[add_model]()

	def tab_online_arena():
	global S3_SESSION, GEN_LIB, MODEL_LIB, INIT_MODELS, CURRENT_MODELS, MODEL_LIST
	with gradio.Row():
	with gradio.Column():
	model_left = gradio.Dropdown(MODEL_LIST, value=choice(MODEL_LIST), interactive=True, multiselect=False, label="Left model")
	chatbot_left = gradio.Chatbot()
	with gradio.Column():
	model_right = gradio.Dropdown(MODEL_LIST, value=choice(MODEL_LIST), interactive=True, multiselect=False, label="Right model")
	chatbot_right = gradio.Chatbot()

	with gradio.Row():
	msg = gradio.Textbox(label='Prompt', placeholder='Put your prompt here')

	with gradio.Row():
	both_good = gradio.Button('Both Good')
	left_better = gradio.Button('Left Better')
	right_better = gradio.Button('Right Better')
	both_bad = gradio.Button('Both Bad')

	with gradio.Row():
	with gradio.Accordion("Parameters", open=False):
	no_context = gradio.Checkbox(label="No context", value=False)
	top_p = gradio.Slider(label='Top P', minimum=0, maximum=1, value=1, step=0.05, interactive=True)
	temp = gradio.Slider(label='Temperature', minimum=0, maximum=1, value=0.7, step=0.05, interactive=True)
	max_tokens = gradio.Slider(label='Max ouput tokens', minimum=1, maximum=2048, value=256, step=1, interactive=True)

	with gradio.Row():
	clear = gradio.ClearButton([msg, chatbot_left, chatbot_right], value='Clear history')
	regen_left = gradio.Button(value='Regenerate left answer')
	regen_right = gradio.Button(value='Regenerate right answer')
	regen_left.click(
	model_regen,
	[msg, chatbot_left, model_left, top_p, temp, max_tokens, no_context],
	[msg, chatbot_left]
	)
	regen_right.click(
	model_regen,
	[msg, chatbot_right, model_right, top_p, temp, max_tokens, no_context],
	[msg, chatbot_right]
	)

	with gradio.Blocks():
	model_left.change(clear_chat, [], [msg, chatbot_left])
	model_right.change(clear_chat, [], [msg, chatbot_right])
	msg.submit(
	model_gen,
	[msg, chatbot_left, model_left, top_p, temp, max_tokens, no_context],
	[msg, chatbot_left]
	)
	msg.submit(
	model_gen,
	[msg, chatbot_right, model_right, top_p, temp, max_tokens, no_context],
	[msg, chatbot_right]
	)

	def tab_leaderboard():
	df = pd.DataFrame({
	"Model" : ['A', 'B', 'C',],
	"Test 1" : [0, 1, 0],
	"Test 2" : [1, 0, 1,],
	})

	# Function to apply text color
	def highlight_cols(x):
	df = x.copy()
	# df.loc[:, :] = 'color: purple'
	df[['Model']] = 'color: green'
	return df

	# Applying the style function
	# s = df.style.apply(highlight_cols, axis = None)

	# Displaying the styled dataframe in Gradio
	with gradio.TabItem("Autogen Metrics", id=0):
	with gradio.Blocks() as demo:
	gradio.DataFrame(df)

	with gradio.TabItem("Autometrics", id=1):
	with gradio.Blocks() as demo:
	gradio.DataFrame(df)

	with gradio.TabItem("SBS metrics", id=2):
	with gradio.Blocks() as demo:
	gradio.DataFrame(df)

	with gradio.TabItem("Arena ELO rating", id=3):
	with gradio.Blocks() as demo:
	gradio.DataFrame(df)

	def tab_offline_arena():
	global MODEL_LIST
	with gradio.Row():
	with gradio.Column(scale=1):
	with gradio.Accordion("Choose models to sample from", open=False):
	model_options = MODEL_LIST
	selected_models = gradio.CheckboxGroup(model_options, info="", value=model_options, show_label=False, interactive=True)
	clear_button = gradio.Button("Clear", scale=1)
	# clear the selected_models
	clear_button.click(lambda: {selected_models: {"value": [], "__type__": "update"}}, inputs=[], outputs=[selected_models])
	with gradio.Column(scale=1):
	with gradio.Accordion("Choose task types to sample from", open=False):
	select_tasks = gradio.CheckboxGroup(['Task 1', "Task 2", "Task 3"], info="", value=['Task 1', "Task 2", "Task 3"], show_label=False, interactive=True)
	clear_task_button = gradio.Button("Clear", scale=1)
	# clear the select_tasks
	clear_task_button.click(lambda: {select_tasks: {"value": [], "__type__": "update"}}, inputs=[], outputs=[select_tasks])
	btn_show_history = gradio.Button("Click to get sample and models' ouputs")
	with gradio.Column(scale=1):
	with gradio.Accordion("Choose criteria to sample", open=False):
	with gradio.Row():
	with gradio.Tab("Task 1"):
	select_criteria_1 = gradio.CheckboxGroup(['Criterion 1', "Criterion 2", "Criterion 3"], info="", value=['Criterion 1', "Criterion 2", "Criterion 3"], show_label=False, interactive=True)
	with gradio.Tab("Task 2"):
	select_criteria_2 = gradio.CheckboxGroup(['Criterion 1', "Criterion 2", "Criterion 3"], info="", value=['Criterion 1', "Criterion 2", "Criterion 3"], show_label=False, interactive=True)
	with gradio.Tab("Task 3"):
	select_criteria_3 = gradio.CheckboxGroup(['Criterion 1', "Criterion 2", "Criterion 3"], info="", value=['Criterion 1', "Criterion 2", "Criterion 3"], show_label=False, interactive=True)

	clear_criteria_button = gradio.Button("Clear", scale=1)
	# clear the select_tasks
	clear_criteria_button.click(lambda: {select_criteria_1: {"value": [], "__type__": "update"},
	select_criteria_2: {"value": [], "__type__": "update"},
	select_criteria_3: {"value": [], "__type__": "update"}},
	inputs=[], outputs=[select_criteria_1, select_criteria_2, select_criteria_3])

	chatbot_data = [['hey', 'Hey!'], ["are we testing something?", None]]
	text_data = ["Are we?", "Indeed we are."]
	eval_text = '1. Twist it\n2. Bop it\n3. Crank it'

	with gradio.Accordion("History", open=False) as acc_history:
	with gradio.Row():
	with gradio.Column():
	pass
	with gradio.Column(scale=0.8):
	chatbot_history = gradio.Chatbot(container=True, elem_id="small")
	with gradio.Column():
	pass
	with gradio.Row():
	model_a = gradio.Text(label='Model A')
	model_b = gradio.Text(label='Model B')
	with gradio.Row():
	eval_guide = gradio.Text('Get samples and outputs in order to generate guide', label='Evaluation guide')
	with gradio.Row():
	both_good = gradio.Button('Both Good')
	left_better = gradio.Button('Left Better')
	right_better = gradio.Button('Right Better')
	both_bad = gradio.Button('Both Bad')

	with gradio.Row():
	with gradio.Column():
	reason = gradio.Textbox(label='Reasoning', placeholder='Put your reasoning here...', lines=5)
	with gradio.Column():
	gradio.Textbox(value='You have chosen: None', interactive=False, show_label=False)
	submit_next = gradio.Button("Submit your evaluation and get next")
	skip_next = gradio.Button("Skip this example and get next one")

	btn_show_history.click(lambda: (gradio.Accordion("History", open=True), chatbot_data, text_data[0], text_data[1], eval_text), inputs=[], outputs=[acc_history, chatbot_history, model_a, model_b, eval_guide])


	def build_demo():
	with gradio.Blocks(theme=gradio.themes.Soft(text_size=gradio.themes.sizes.text_lg), css=css) as demo: # , css=css, js=js_light
	gradio.HTML(BANNER, elem_id="banner")
	gradio.Markdown(HEADER_MD)
	with gradio.Tabs() as tabs:
	with gradio.TabItem("🐼 MERA leaderboard", id=0):
	tab_leaderboard()

	with gradio.TabItem("🆚 SBS by categories and criteria", id=1):
	tab_offline_arena()

	with gradio.TabItem("🥊 Model arena", id=2):
	tab_online_arena()
	# _tab_explore()

	with gradio.TabItem("💪 About MERA", id=3):
	gradio.Markdown(ABOUT_MD)
	return demo

	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument("--share", action="store_true")
	# parser.add_argument("--bench_table", help="Path to MERA table", default="data_dir/MERA_jun2024.jsonl")
	args = parser.parse_args()
	# data_load(args.result_file)
	# TYPES = ["number", "markdown", "number"]

	with open("header.md", "r") as f:
	HEADER_MD = f.read()

	with open("about.md", "r") as f:
	ABOUT_MD = f.read()

	try:
	session = boto3.session.Session()
	S3_SESSION = session.client(
	service_name='s3',
	endpoint_url=os.getenv('S3_ENDPOINT'),
	aws_access_key_id=os.getenv('S3_ACCESS_KEY'),
	aws_secret_access_key=os.getenv('S3_SECRET_KEY'),
	)
	except:
	print('Failed to start s3 session')

	app = build_demo()
	app.launch(share=args.share, height=3000, width="110%", allowed_paths=["."]) # share=args.share

	# demo = gradio.Interface(fn=gen, inputs="text", outputs="text")
	# demo.launch()