Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- .env +3 -0
- .pytest_cache/.gitignore +2 -0
- .pytest_cache/CACHEDIR.TAG +4 -0
- .pytest_cache/README.md +8 -0
- .pytest_cache/v/cache/lastfailed +3 -0
- .pytest_cache/v/cache/stepwise +1 -0
- README.md +2 -8
- app.py +5 -0
- app/__init__.py +1 -0
- app/__pycache__/__init__.cpython-310.pyc +0 -0
- app/__pycache__/config.cpython-310.pyc +0 -0
- app/__pycache__/db.cpython-310.pyc +0 -0
- app/__pycache__/init.cpython-310.pyc +0 -0
- app/__pycache__/leaderboard.cpython-310.pyc +0 -0
- app/__pycache__/llm.cpython-310.pyc +0 -0
- app/__pycache__/messages.cpython-310.pyc +0 -0
- app/__pycache__/models.cpython-310.pyc +0 -0
- app/__pycache__/synth.cpython-310.pyc +0 -0
- app/__pycache__/ui.cpython-310.pyc +0 -0
- app/__pycache__/ui_battle.cpython-310.pyc +0 -0
- app/__pycache__/ui_leaderboard.cpython-310.pyc +0 -0
- app/__pycache__/ui_vote.cpython-310.pyc +0 -0
- app/__pycache__/utils.cpython-310.pyc +0 -0
- app/__pycache__/vote.cpython-310.pyc +0 -0
- app/config.py +27 -0
- app/db.py +61 -0
- app/init.py +20 -0
- app/leaderboard.py +52 -0
- app/llm.py +45 -0
- app/messages.py +82 -0
- app/models.py +19 -0
- app/ui.py +12 -0
- app/ui_battle.py +132 -0
- app/ui_leaderboard.py +46 -0
- app/utils.py +6 -0
- app/vote.py +124 -0
- database.db +0 -0
- requirements.txt +3 -0
- scripts/view_db.py +61 -0
.env
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
SYNC_DB=true
|
2 |
+
DATASET_ID=your-username/llm-arena-db
|
3 |
+
SPACE_ID=your-username/llm-arena
|
.pytest_cache/.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
# Created by pytest automatically.
|
2 |
+
*
|
.pytest_cache/CACHEDIR.TAG
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Signature: 8a477f597d28d172789f06886806bc55
|
2 |
+
# This file is a cache directory tag created by pytest.
|
3 |
+
# For information about cache directory tags, see:
|
4 |
+
# https://bford.info/cachedir/spec.html
|
.pytest_cache/README.md
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# pytest cache directory #
|
2 |
+
|
3 |
+
This directory contains data from the pytest's cache plugin,
|
4 |
+
which provides the `--lf` and `--ff` options, as well as the `cache` fixture.
|
5 |
+
|
6 |
+
**Do not** commit this to version control.
|
7 |
+
|
8 |
+
See [the docs](https://docs.pytest.org/en/stable/how-to/cache.html) for more information.
|
.pytest_cache/v/cache/lastfailed
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"GenAI-Arena/arena_elo/simple_test.py": true
|
3 |
+
}
|
.pytest_cache/v/cache/stepwise
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[]
|
README.md
CHANGED
@@ -1,12 +1,6 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
|
4 |
-
colorFrom: blue
|
5 |
-
colorTo: yellow
|
6 |
sdk: gradio
|
7 |
sdk_version: 5.9.1
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: darija-chatbot-arena
|
3 |
+
app_file: app.py
|
|
|
|
|
4 |
sdk: gradio
|
5 |
sdk_version: 5.9.1
|
|
|
|
|
6 |
---
|
|
|
|
app.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from app.ui import app
|
3 |
+
|
4 |
+
if __name__ == "__main__":
|
5 |
+
app.launch(debug=True)
|
app/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from .ui import app
|
app/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (196 Bytes). View file
|
|
app/__pycache__/config.cpython-310.pyc
ADDED
Binary file (576 Bytes). View file
|
|
app/__pycache__/db.cpython-310.pyc
ADDED
Binary file (1.89 kB). View file
|
|
app/__pycache__/init.cpython-310.pyc
ADDED
Binary file (532 Bytes). View file
|
|
app/__pycache__/leaderboard.cpython-310.pyc
ADDED
Binary file (2.08 kB). View file
|
|
app/__pycache__/llm.cpython-310.pyc
ADDED
Binary file (1.58 kB). View file
|
|
app/__pycache__/messages.cpython-310.pyc
ADDED
Binary file (4.94 kB). View file
|
|
app/__pycache__/models.cpython-310.pyc
ADDED
Binary file (387 Bytes). View file
|
|
app/__pycache__/synth.cpython-310.pyc
ADDED
Binary file (4.97 kB). View file
|
|
app/__pycache__/ui.cpython-310.pyc
ADDED
Binary file (583 Bytes). View file
|
|
app/__pycache__/ui_battle.cpython-310.pyc
ADDED
Binary file (3 kB). View file
|
|
app/__pycache__/ui_leaderboard.cpython-310.pyc
ADDED
Binary file (951 Bytes). View file
|
|
app/__pycache__/ui_vote.cpython-310.pyc
ADDED
Binary file (1.9 kB). View file
|
|
app/__pycache__/utils.cpython-310.pyc
ADDED
Binary file (307 Bytes). View file
|
|
app/__pycache__/vote.cpython-310.pyc
ADDED
Binary file (4.16 kB). View file
|
|
app/config.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
#########################
|
4 |
+
# General Configuration #
|
5 |
+
#########################
|
6 |
+
|
7 |
+
DB_NAME = "database.db"
|
8 |
+
DB_PATH = f"/data/{DB_NAME}" if os.path.isdir("/data") else DB_NAME
|
9 |
+
|
10 |
+
# LLM Models Configuration
|
11 |
+
AVAILABLE_MODELS = {
|
12 |
+
"GPT-4": "gpt4",
|
13 |
+
"Claude-3": "claude3",
|
14 |
+
"Gemini-Pro": "gemini",
|
15 |
+
"Mixtral": "mixtral",
|
16 |
+
"Llama-2": "llama2",
|
17 |
+
# Add more models as needed
|
18 |
+
}
|
19 |
+
|
20 |
+
# General Configuration
|
21 |
+
MAX_PROMPT_LENGTH = 2000
|
22 |
+
MIN_PROMPT_LENGTH = 10
|
23 |
+
|
24 |
+
# Sync settings
|
25 |
+
SYNC_DB = True if os.getenv('SYNC_DB') else False
|
26 |
+
DB_DATASET_ID = os.getenv('DATASET_ID')
|
27 |
+
SPACE_ID = os.getenv('SPACE_ID')
|
app/db.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sqlite3
|
2 |
+
from .config import *
|
3 |
+
import os
|
4 |
+
import shutil
|
5 |
+
from huggingface_hub import hf_hub_download
|
6 |
+
|
7 |
+
def download_db():
|
8 |
+
if not os.path.isfile(DB_PATH):
|
9 |
+
print("Downloading DB...")
|
10 |
+
try:
|
11 |
+
cache_path = hf_hub_download(repo_id=DB_DATASET_ID, repo_type='dataset', filename=DB_NAME)
|
12 |
+
shutil.copyfile(cache_path, DB_PATH)
|
13 |
+
print("Downloaded DB")
|
14 |
+
except Exception as e:
|
15 |
+
print("Error while downloading DB:", e)
|
16 |
+
|
17 |
+
def get_db():
|
18 |
+
return sqlite3.connect(DB_PATH)
|
19 |
+
|
20 |
+
def create_db():
|
21 |
+
conn = get_db()
|
22 |
+
cursor = conn.cursor()
|
23 |
+
|
24 |
+
# Model table - stores model info and vote counts
|
25 |
+
cursor.execute('''
|
26 |
+
CREATE TABLE IF NOT EXISTS model (
|
27 |
+
name TEXT UNIQUE,
|
28 |
+
upvote INTEGER DEFAULT 0,
|
29 |
+
downvote INTEGER DEFAULT 0
|
30 |
+
);
|
31 |
+
''')
|
32 |
+
|
33 |
+
# Vote table - stores individual votes with prompts and responses
|
34 |
+
cursor.execute('''
|
35 |
+
CREATE TABLE IF NOT EXISTS vote (
|
36 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
37 |
+
username TEXT,
|
38 |
+
model TEXT,
|
39 |
+
vote INTEGER,
|
40 |
+
prompt TEXT,
|
41 |
+
response TEXT,
|
42 |
+
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
43 |
+
);
|
44 |
+
''')
|
45 |
+
|
46 |
+
# Battle log table - stores battle outcomes with both responses
|
47 |
+
cursor.execute('''
|
48 |
+
CREATE TABLE IF NOT EXISTS votelog (
|
49 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
50 |
+
username TEXT,
|
51 |
+
chosen TEXT,
|
52 |
+
rejected TEXT,
|
53 |
+
prompt TEXT,
|
54 |
+
chosen_response TEXT,
|
55 |
+
rejected_response TEXT,
|
56 |
+
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
57 |
+
);
|
58 |
+
''')
|
59 |
+
|
60 |
+
conn.commit()
|
61 |
+
cursor.close()
|
app/init.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .config import *
|
2 |
+
from .db import *
|
3 |
+
from huggingface_hub import CommitScheduler
|
4 |
+
from pathlib import Path
|
5 |
+
import os
|
6 |
+
|
7 |
+
scheduler = None
|
8 |
+
|
9 |
+
if SYNC_DB:
|
10 |
+
download_db()
|
11 |
+
# Sync local DB with remote repo every 5 minute (only if a change is detected)
|
12 |
+
scheduler = CommitScheduler(
|
13 |
+
repo_id=DB_DATASET_ID,
|
14 |
+
repo_type="dataset",
|
15 |
+
folder_path=Path(DB_PATH).parent,
|
16 |
+
every=5,
|
17 |
+
allow_patterns=DB_NAME,
|
18 |
+
)
|
19 |
+
|
20 |
+
create_db()
|
app/leaderboard.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .config import *
|
2 |
+
from .db import *
|
3 |
+
from .models import *
|
4 |
+
|
5 |
+
import pandas as pd
|
6 |
+
|
7 |
+
def get_leaderboard(reveal_prelim=False):
|
8 |
+
print("Getting leaderboard data...")
|
9 |
+
conn = get_db()
|
10 |
+
cursor = conn.cursor()
|
11 |
+
|
12 |
+
sql = '''
|
13 |
+
SELECT name,
|
14 |
+
SUM(CASE WHEN vote = 1 THEN 1 ELSE 0 END) as upvote,
|
15 |
+
SUM(CASE WHEN vote = -1 THEN 1 ELSE 0 END) as downvote
|
16 |
+
FROM model
|
17 |
+
LEFT JOIN vote ON model.name = vote.model
|
18 |
+
GROUP BY name
|
19 |
+
'''
|
20 |
+
|
21 |
+
print("Executing SQL query...")
|
22 |
+
cursor.execute(sql)
|
23 |
+
data = cursor.fetchall()
|
24 |
+
df = pd.DataFrame(data, columns=['name', 'upvote', 'downvote'])
|
25 |
+
df['votes'] = df['upvote'] + df['downvote']
|
26 |
+
print(f"Initial dataframe has {len(df)} models")
|
27 |
+
|
28 |
+
if not reveal_prelim:
|
29 |
+
print(f"Filtering out models with less than 100 votes... ({len(df[df['votes'] <= 100])} models will be filtered)")
|
30 |
+
df = df[df['votes'] > 100] # Minimum vote threshold
|
31 |
+
print(f"After filtering: {len(df)} models remain")
|
32 |
+
|
33 |
+
print(f"Calculating ELO scores for {len(df)} models...")
|
34 |
+
# Calculate ELO scores
|
35 |
+
df['score'] = 1200 # Base ELO
|
36 |
+
for i in range(len(df)):
|
37 |
+
for j in range(len(df)):
|
38 |
+
if i != j:
|
39 |
+
expected_a = 1 / (1 + 10 ** ((df['score'].iloc[j] - df['score'].iloc[i]) / 400))
|
40 |
+
expected_b = 1 / (1 + 10 ** ((df['score'].iloc[i] - df['score'].iloc[j]) / 400))
|
41 |
+
actual_a = df['upvote'].iloc[i] / df['votes'].iloc[i] if df['votes'].iloc[i] > 0 else 0.5
|
42 |
+
actual_b = df['upvote'].iloc[j] / df['votes'].iloc[j] if df['votes'].iloc[j] > 0 else 0.5
|
43 |
+
df.iloc[i, df.columns.get_loc('score')] += 32 * (actual_a - expected_a)
|
44 |
+
df.iloc[j, df.columns.get_loc('score')] += 32 * (actual_b - expected_b)
|
45 |
+
|
46 |
+
df['score'] = round(df['score'])
|
47 |
+
df = df.sort_values(by='score', ascending=False)
|
48 |
+
df['order'] = ['#' + str(i+1) for i in range(len(df))]
|
49 |
+
|
50 |
+
print(f"Returning final leaderboard data with {len(df)} models...")
|
51 |
+
# Return only the columns we want to display
|
52 |
+
return df[['order', 'name', 'score', 'votes']].values.tolist()
|
app/llm.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import threading
|
2 |
+
from .config import *
|
3 |
+
from .models import *
|
4 |
+
from .utils import *
|
5 |
+
import random
|
6 |
+
|
7 |
+
def generate_response(prompt, model_name):
|
8 |
+
"""
|
9 |
+
Replace this with actual API calls to your LLM endpoints
|
10 |
+
"""
|
11 |
+
# Placeholder implementation
|
12 |
+
responses = {
|
13 |
+
"gpt4": "This is a simulated GPT-4 response",
|
14 |
+
"claude3": "This is a simulated Claude-3 response",
|
15 |
+
"gemini": "This is a simulated Gemini-Pro response",
|
16 |
+
"mixtral": "This is a simulated Mixtral response",
|
17 |
+
"llama2": "This is a simulated Llama-2 response"
|
18 |
+
}
|
19 |
+
return responses.get(model_name, "Model not found")
|
20 |
+
|
21 |
+
def get_responses(prompt, model_a, model_b):
|
22 |
+
results = {}
|
23 |
+
|
24 |
+
def predict_and_store(prompt, model, result_storage):
|
25 |
+
try:
|
26 |
+
if model in AVAILABLE_MODELS:
|
27 |
+
result = generate_response(prompt, AVAILABLE_MODELS[model])
|
28 |
+
else:
|
29 |
+
result = generate_response(prompt, model)
|
30 |
+
result_storage[model] = result
|
31 |
+
except Exception as e:
|
32 |
+
raise gr.Error(f'Unable to generate response: {str(e)}')
|
33 |
+
|
34 |
+
thread1 = threading.Thread(target=predict_and_store, args=(prompt, model_a, results))
|
35 |
+
thread2 = threading.Thread(target=predict_and_store, args=(prompt, model_b, results))
|
36 |
+
|
37 |
+
thread1.start()
|
38 |
+
thread2.start()
|
39 |
+
thread1.join()
|
40 |
+
thread2.join()
|
41 |
+
|
42 |
+
return results[model_a], results[model_b]
|
43 |
+
|
44 |
+
def random_models():
|
45 |
+
return random.sample(list(AVAILABLE_MODELS.keys()), 2)
|
app/messages.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .config import *
|
2 |
+
|
3 |
+
############
|
4 |
+
# Messages #
|
5 |
+
############
|
6 |
+
|
7 |
+
MUST_BE_LOGGEDIN = "Please login with Hugging Face to participate in the TTS Arena."
|
8 |
+
DESCR = """
|
9 |
+
# TTS Arena: Benchmarking TTS Models in the Wild
|
10 |
+
Vote to help the community find the best available text-to-speech model!
|
11 |
+
""".strip()
|
12 |
+
BATTLE_INSTR = """
|
13 |
+
## Battle
|
14 |
+
Choose 2 candidates and vote on which one is better! Currently in beta.
|
15 |
+
* Input text (English only) to synthesize audio (or press 🎲 for random text).
|
16 |
+
* Listen to the two audio clips, one after the other.
|
17 |
+
* Vote on which audio sounds more natural to you.
|
18 |
+
"""
|
19 |
+
INSTR = """
|
20 |
+
## Vote
|
21 |
+
* Input text (English only) to synthesize audio (or press 🎲 for random text).
|
22 |
+
* Listen to the two audio clips, one after the other.
|
23 |
+
* Vote on which audio sounds more natural to you.
|
24 |
+
* _Note: Model names are revealed after the vote is cast._
|
25 |
+
Note: It may take up to 30 seconds to synthesize audio.
|
26 |
+
""".strip()
|
27 |
+
request = ""
|
28 |
+
if SPACE_ID:
|
29 |
+
request = f"""
|
30 |
+
### Request a model
|
31 |
+
Please [create a Discussion](https://huggingface.co/spaces/{SPACE_ID}/discussions/new) to request a model.
|
32 |
+
"""
|
33 |
+
ABOUT = f"""
|
34 |
+
## About
|
35 |
+
The TTS Arena evaluates leading speech synthesis models. It is inspired by LMsys's [Chatbot Arena](https://chat.lmsys.org/).
|
36 |
+
### Motivation
|
37 |
+
The field of speech synthesis has long lacked an accurate method to measure the quality of different models. Objective metrics like WER (word error rate) are unreliable measures of model quality, and subjective measures such as MOS (mean opinion score) are typically small-scale experiments conducted with few listeners. As a result, these measurements are generally not useful for comparing two models of roughly similar quality. To address these drawbacks, we are inviting the community to rank models in an easy-to-use interface, and opening it up to the public in order to make both the opportunity to rank models, as well as the results, more easily accessible to everyone.
|
38 |
+
### The Arena
|
39 |
+
The leaderboard allows a user to enter text, which will be synthesized by two models. After listening to each sample, the user can vote on which model sounds more natural. Due to the risks of human bias and abuse, model names are revealed only after a vote is submitted.
|
40 |
+
### Credits
|
41 |
+
Thank you to the following individuals who helped make this project possible:
|
42 |
+
* VB ([Twitter](https://twitter.com/reach_vb) / [Hugging Face](https://huggingface.co/reach-vb))
|
43 |
+
* Clémentine Fourrier ([Twitter](https://twitter.com/clefourrier) / [Hugging Face](https://huggingface.co/clefourrier))
|
44 |
+
* Lucain Pouget ([Twitter](https://twitter.com/Wauplin) / [Hugging Face](https://huggingface.co/Wauplin))
|
45 |
+
* Yoach Lacombe ([Twitter](https://twitter.com/yoachlacombe) / [Hugging Face](https://huggingface.co/ylacombe))
|
46 |
+
* Main Horse ([Twitter](https://twitter.com/main_horse) / [Hugging Face](https://huggingface.co/main-horse))
|
47 |
+
* Sanchit Gandhi ([Twitter](https://twitter.com/sanchitgandhi99) / [Hugging Face](https://huggingface.co/sanchit-gandhi))
|
48 |
+
* Apolinário Passos ([Twitter](https://twitter.com/multimodalart) / [Hugging Face](https://huggingface.co/multimodalart))
|
49 |
+
* Pedro Cuenca ([Twitter](https://twitter.com/pcuenq) / [Hugging Face](https://huggingface.co/pcuenq))
|
50 |
+
{request}
|
51 |
+
### Privacy statement
|
52 |
+
We may store text you enter and generated audio. We store a unique ID for each session. You agree that we may collect, share, and/or publish any data you input for research and/or commercial purposes.
|
53 |
+
### License
|
54 |
+
Generated audio clips cannot be redistributed and may be used for personal, non-commercial use only.
|
55 |
+
Random sentences are sourced from a filtered subset of the [Harvard Sentences](https://www.cs.columbia.edu/~hgs/audio/harvard.html).
|
56 |
+
""".strip()
|
57 |
+
LDESC = """
|
58 |
+
## 🏆 Leaderboard
|
59 |
+
Vote to help the community determine the best language models.
|
60 |
+
The leaderboard displays models in descending order based on votes cast by the community.
|
61 |
+
Important: In order to help keep results fair, the leaderboard hides results by default until the number of votes passes a threshold.
|
62 |
+
Tick the `Show preliminary results` to show models with few votes. Please note that preliminary results may be inaccurate.
|
63 |
+
""".strip()
|
64 |
+
ABOUT_MD = """
|
65 |
+
# 🤖 LLM Arena
|
66 |
+
|
67 |
+
A platform for comparing and ranking different Large Language Models through human feedback.
|
68 |
+
|
69 |
+
## How it works
|
70 |
+
|
71 |
+
1. **Battle Mode**: Compare responses from two different LLMs side-by-side and vote for the better one
|
72 |
+
2. **Leaderboard**: See how models rank against each other based on user votes
|
73 |
+
3. **Fair Comparison**: Models are randomly selected and anonymized during voting to prevent bias
|
74 |
+
|
75 |
+
## Contributing
|
76 |
+
|
77 |
+
Want to add a new model? Check out our [GitHub repository](link-to-repo) for instructions.
|
78 |
+
|
79 |
+
## License
|
80 |
+
|
81 |
+
This project is licensed under MIT License. Individual models may have their own licenses.
|
82 |
+
"""
|
app/models.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Models to include in the leaderboard
|
2 |
+
AVAILABLE_MODELS = {
|
3 |
+
"GPT-4": "gpt4",
|
4 |
+
"Claude-3": "claude3",
|
5 |
+
"Gemini-Pro": "gemini",
|
6 |
+
"Mixtral": "mixtral",
|
7 |
+
"Llama-2": "llama2",
|
8 |
+
# Add more models as needed
|
9 |
+
}
|
10 |
+
|
11 |
+
# Model name mapping for display
|
12 |
+
model_names = {
|
13 |
+
"gpt4": "GPT-4",
|
14 |
+
"claude3": "Claude-3",
|
15 |
+
"gemini": "Gemini-Pro",
|
16 |
+
"mixtral": "Mixtral",
|
17 |
+
"llama2": "Llama-2",
|
18 |
+
# Add more mappings as needed
|
19 |
+
}
|
app/ui.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from .config import *
|
3 |
+
from .messages import *
|
4 |
+
from .ui_battle import *
|
5 |
+
from .ui_leaderboard import *
|
6 |
+
|
7 |
+
with gr.Blocks() as about:
|
8 |
+
gr.Markdown(ABOUT_MD)
|
9 |
+
|
10 |
+
with gr.Blocks(css="footer {visibility: hidden}", title="LLM Arena") as app:
|
11 |
+
gr.Markdown(ABOUT_MD)
|
12 |
+
gr.TabbedInterface([battle, leaderboard, about], ['Battle', 'Leaderboard', 'About'])
|
app/ui_battle.py
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from .config import *
|
3 |
+
from .vote import *
|
4 |
+
from .messages import *
|
5 |
+
from .llm import *
|
6 |
+
import random
|
7 |
+
|
8 |
+
def disable():
|
9 |
+
return [gr.update(interactive=False), gr.update(interactive=False)]
|
10 |
+
def enable():
|
11 |
+
return [gr.update(interactive=True), gr.update(interactive=True)]
|
12 |
+
|
13 |
+
with gr.Blocks() as battle:
|
14 |
+
battle_useridstate = gr.State()
|
15 |
+
|
16 |
+
gr.Markdown("## 🤖 LLM Battle\nCompare two AI responses and vote for the better one!")
|
17 |
+
|
18 |
+
with gr.Group():
|
19 |
+
with gr.Row():
|
20 |
+
prompt = gr.Textbox(
|
21 |
+
container=False,
|
22 |
+
show_label=False,
|
23 |
+
placeholder="Enter your prompt here...",
|
24 |
+
lines=3,
|
25 |
+
max_lines=10,
|
26 |
+
scale=9999999,
|
27 |
+
min_width=0
|
28 |
+
)
|
29 |
+
random_prompt = gr.Button('🎲', scale=0, min_width=0, variant='tool')
|
30 |
+
|
31 |
+
btn = gr.Button("Generate Responses", variant='primary')
|
32 |
+
|
33 |
+
with gr.Row(visible=False) as response_row:
|
34 |
+
with gr.Column():
|
35 |
+
with gr.Group():
|
36 |
+
response1 = gr.Textbox(
|
37 |
+
label="Model A Response",
|
38 |
+
lines=8,
|
39 |
+
max_lines=8,
|
40 |
+
interactive=False
|
41 |
+
)
|
42 |
+
a_better = gr.Button("A is better", variant='primary')
|
43 |
+
model1_name = gr.Textbox(
|
44 |
+
interactive=False,
|
45 |
+
show_label=False,
|
46 |
+
container=False,
|
47 |
+
value="Vote to reveal model A",
|
48 |
+
text_align="center",
|
49 |
+
visible=False
|
50 |
+
)
|
51 |
+
with gr.Column():
|
52 |
+
with gr.Group():
|
53 |
+
response2 = gr.Textbox(
|
54 |
+
label="Model B Response",
|
55 |
+
lines=8,
|
56 |
+
max_lines=8,
|
57 |
+
interactive=False
|
58 |
+
)
|
59 |
+
b_better = gr.Button("B is better", variant='primary')
|
60 |
+
model2_name = gr.Textbox(
|
61 |
+
interactive=False,
|
62 |
+
show_label=False,
|
63 |
+
container=False,
|
64 |
+
value="Vote to reveal model B",
|
65 |
+
text_align="center",
|
66 |
+
visible=False
|
67 |
+
)
|
68 |
+
|
69 |
+
def generate_responses(prompt):
|
70 |
+
if len(prompt.strip()) < MIN_PROMPT_LENGTH:
|
71 |
+
raise gr.Error(f"Prompt must be at least {MIN_PROMPT_LENGTH} characters")
|
72 |
+
if len(prompt.strip()) > MAX_PROMPT_LENGTH:
|
73 |
+
raise gr.Error(f"Prompt must be less than {MAX_PROMPT_LENGTH} characters")
|
74 |
+
|
75 |
+
model_a, model_b = random_models()
|
76 |
+
resp_a, resp_b = get_responses(prompt, model_a, model_b)
|
77 |
+
|
78 |
+
return [
|
79 |
+
resp_a, # response1
|
80 |
+
resp_b, # response2
|
81 |
+
model_a, # model1_name (actual model identifier)
|
82 |
+
model_b, # model2_name (actual model identifier)
|
83 |
+
gr.update(visible=True), # response_row
|
84 |
+
gr.update(interactive=True, visible=True), # a_better
|
85 |
+
gr.update(interactive=True, visible=True), # b_better
|
86 |
+
gr.update(visible=False), # model1_name visibility
|
87 |
+
gr.update(visible=False) # model2_name visibility
|
88 |
+
]
|
89 |
+
|
90 |
+
# Event handlers
|
91 |
+
btn.click(
|
92 |
+
fn=generate_responses,
|
93 |
+
inputs=[prompt],
|
94 |
+
outputs=[
|
95 |
+
response1,
|
96 |
+
response2,
|
97 |
+
model1_name,
|
98 |
+
model2_name,
|
99 |
+
response_row,
|
100 |
+
a_better,
|
101 |
+
b_better,
|
102 |
+
model1_name,
|
103 |
+
model2_name
|
104 |
+
]
|
105 |
+
)
|
106 |
+
|
107 |
+
a_better.click(
|
108 |
+
fn=a_is_better,
|
109 |
+
inputs=[model1_name, model2_name, battle_useridstate, prompt, response1, response2],
|
110 |
+
outputs=[a_better, b_better, model1_name, model2_name]
|
111 |
+
)
|
112 |
+
|
113 |
+
b_better.click(
|
114 |
+
fn=b_is_better,
|
115 |
+
inputs=[model1_name, model2_name, battle_useridstate, prompt, response1, response2],
|
116 |
+
outputs=[a_better, b_better, model1_name, model2_name]
|
117 |
+
)
|
118 |
+
|
119 |
+
def get_random_prompt():
|
120 |
+
prompts = [
|
121 |
+
"What are the key differences between Python and JavaScript?",
|
122 |
+
"Explain quantum computing in simple terms.",
|
123 |
+
"Write a short story about a robot learning to feel emotions.",
|
124 |
+
"What are the pros and cons of remote work?",
|
125 |
+
"Explain how blockchain technology works."
|
126 |
+
]
|
127 |
+
return random.choice(prompts)
|
128 |
+
|
129 |
+
random_prompt.click(
|
130 |
+
fn=get_random_prompt,
|
131 |
+
outputs=[prompt]
|
132 |
+
)
|
app/ui_leaderboard.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from .config import *
|
3 |
+
from .leaderboard import *
|
4 |
+
from .messages import *
|
5 |
+
|
6 |
+
with gr.Blocks() as leaderboard:
|
7 |
+
gr.Markdown(LDESC)
|
8 |
+
|
9 |
+
df = gr.Dataframe(
|
10 |
+
headers=['Rank', 'Model', 'Score', 'Total Votes'],
|
11 |
+
interactive=False,
|
12 |
+
wrap=True,
|
13 |
+
column_widths=['80px', '200px', '100px', '100px']
|
14 |
+
)
|
15 |
+
|
16 |
+
reloadbtn = gr.Button("🔄 Refresh")
|
17 |
+
|
18 |
+
reveal_prelim = gr.Checkbox(
|
19 |
+
label="Show preliminary results",
|
20 |
+
info="Include models with few votes",
|
21 |
+
value=True
|
22 |
+
)
|
23 |
+
|
24 |
+
def update_leaderboard(reveal_prelim):
|
25 |
+
return get_leaderboard(reveal_prelim)
|
26 |
+
|
27 |
+
# Update on checkbox changes
|
28 |
+
reveal_prelim.change(
|
29 |
+
update_leaderboard,
|
30 |
+
inputs=[reveal_prelim],
|
31 |
+
outputs=[df]
|
32 |
+
)
|
33 |
+
|
34 |
+
# Update on refresh button click
|
35 |
+
reloadbtn.click(
|
36 |
+
update_leaderboard,
|
37 |
+
inputs=[reveal_prelim],
|
38 |
+
outputs=[df]
|
39 |
+
)
|
40 |
+
|
41 |
+
# Initial load
|
42 |
+
leaderboard.load(
|
43 |
+
update_leaderboard,
|
44 |
+
inputs=[reveal_prelim],
|
45 |
+
outputs=[df]
|
46 |
+
)
|
app/utils.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import uuid
|
2 |
+
|
3 |
+
def mkuuid(uid):
|
4 |
+
if not uid:
|
5 |
+
uid = uuid.uuid4()
|
6 |
+
return uid
|
app/vote.py
ADDED
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .utils import *
|
2 |
+
from .config import *
|
3 |
+
from .models import *
|
4 |
+
from .db import *
|
5 |
+
from .init import *
|
6 |
+
|
7 |
+
import gradio as gr
|
8 |
+
|
9 |
+
# Vote
|
10 |
+
|
11 |
+
def upvote_model(model, uname, prompt="", response=""):
|
12 |
+
print("Establishing database connection for upvoting.")
|
13 |
+
conn = get_db()
|
14 |
+
cursor = conn.cursor()
|
15 |
+
print(f"Updating upvote count for model: {model}")
|
16 |
+
cursor.execute('UPDATE model SET upvote = upvote + 1 WHERE name = ?', (model,))
|
17 |
+
if cursor.rowcount == 0:
|
18 |
+
print(f"No existing entry found for model '{model}'. Inserting new model with upvote=1 and downvote=0.")
|
19 |
+
cursor.execute('INSERT OR REPLACE INTO model (name, upvote, downvote) VALUES (?, 1, 0)', (model,))
|
20 |
+
print(f"Inserting vote record: username={uname}, model={model}, vote=1, prompt={prompt}, response={response}")
|
21 |
+
cursor.execute('INSERT INTO vote (username, model, vote, prompt, response) VALUES (?, ?, ?, ?, ?)',
|
22 |
+
(uname, model, 1, prompt, response))
|
23 |
+
print("Committing upvote transaction.")
|
24 |
+
conn.commit()
|
25 |
+
print("Closing cursor after upvoting.")
|
26 |
+
cursor.close()
|
27 |
+
|
28 |
+
def downvote_model(model, uname, prompt="", response=""):
|
29 |
+
print("Establishing database connection for downvoting.")
|
30 |
+
conn = get_db()
|
31 |
+
cursor = conn.cursor()
|
32 |
+
print(f"Updating downvote count for model: {model}")
|
33 |
+
cursor.execute('UPDATE model SET downvote = downvote + 1 WHERE name = ?', (model,))
|
34 |
+
if cursor.rowcount == 0:
|
35 |
+
print(f"No existing entry found for model '{model}'. Inserting new model with upvote=0 and downvote=1.")
|
36 |
+
cursor.execute('INSERT OR REPLACE INTO model (name, upvote, downvote) VALUES (?, 0, 1)', (model,))
|
37 |
+
print(f"Inserting vote record: username={uname}, model={model}, vote=-1, prompt={prompt}, response={response}")
|
38 |
+
cursor.execute('INSERT INTO vote (username, model, vote, prompt, response) VALUES (?, ?, ?, ?, ?)',
|
39 |
+
(uname, model, -1, prompt, response))
|
40 |
+
print("Committing downvote transaction.")
|
41 |
+
conn.commit()
|
42 |
+
print("Closing cursor after downvoting.")
|
43 |
+
cursor.close()
|
44 |
+
|
45 |
+
# Battle Mode
|
46 |
+
|
47 |
+
def a_is_better(model1, model2, userid, prompt="", response1="", response2=""):
|
48 |
+
print("Processing vote: A is better.")
|
49 |
+
print(f"Comparing models: {model1} vs {model2}")
|
50 |
+
if not model1 in AVAILABLE_MODELS.keys() and not model1 in AVAILABLE_MODELS.values():
|
51 |
+
print(f"Model '{model1}' is not available. Raising error.")
|
52 |
+
raise gr.Error('Sorry, please try voting again.')
|
53 |
+
userid = mkuuid(userid)
|
54 |
+
print(f"Generated UUID for user: {userid}")
|
55 |
+
if model1 and model2:
|
56 |
+
print("Establishing database connection for voting.")
|
57 |
+
conn = get_db()
|
58 |
+
cursor = conn.cursor()
|
59 |
+
print(f"Inserting votelog: username={userid}, chosen={model1}, rejected={model2}, prompt={prompt}, chosen_response={response1}, rejected_response={response2}")
|
60 |
+
cursor.execute('INSERT INTO votelog (username, chosen, rejected, prompt, chosen_response, rejected_response) VALUES (?, ?, ?, ?, ?, ?)',
|
61 |
+
(str(userid), model1, model2, prompt, response1, response2))
|
62 |
+
if scheduler:
|
63 |
+
print("Scheduler detected. Acquiring scheduler lock before committing.")
|
64 |
+
with scheduler.lock:
|
65 |
+
print("Committing votelog transaction with scheduler lock.")
|
66 |
+
conn.commit()
|
67 |
+
else:
|
68 |
+
print("Committing votelog transaction without scheduler lock.")
|
69 |
+
conn.commit()
|
70 |
+
print("Closing cursor after logging vote.")
|
71 |
+
cursor.close()
|
72 |
+
print(f"Upvoting model: {model1}")
|
73 |
+
upvote_model(model1, str(userid), prompt, response1)
|
74 |
+
print(f"Downvoting model: {model2}")
|
75 |
+
downvote_model(model2, str(userid), prompt, response2)
|
76 |
+
print("Reloading UI after voting.")
|
77 |
+
return reload(model1, model2, userid, chose_a=True)
|
78 |
+
|
79 |
+
def b_is_better(model1, model2, userid, prompt="", response1="", response2=""):
|
80 |
+
print("Processing vote: B is better.")
|
81 |
+
print(f"Comparing models: {model1} vs {model2}")
|
82 |
+
if not model1 in AVAILABLE_MODELS.keys() and not model1 in AVAILABLE_MODELS.values():
|
83 |
+
print(f"Model '{model1}' is not available. Raising error.")
|
84 |
+
raise gr.Error('Sorry, please try voting again.')
|
85 |
+
userid = mkuuid(userid)
|
86 |
+
print(f"Generated UUID for user: {userid}")
|
87 |
+
if model1 and model2:
|
88 |
+
print("Establishing database connection for voting.")
|
89 |
+
conn = get_db()
|
90 |
+
cursor = conn.cursor()
|
91 |
+
print(f"Inserting votelog: username={userid}, chosen={model2}, rejected={model1}, prompt={prompt}, chosen_response={response2}, rejected_response={response1}")
|
92 |
+
cursor.execute('INSERT INTO votelog (username, chosen, rejected, prompt, chosen_response, rejected_response) VALUES (?, ?, ?, ?, ?, ?)',
|
93 |
+
(str(userid), model2, model1, prompt, response2, response1))
|
94 |
+
if scheduler:
|
95 |
+
print("Scheduler detected. Acquiring scheduler lock before committing.")
|
96 |
+
with scheduler.lock:
|
97 |
+
print("Committing votelog transaction with scheduler lock.")
|
98 |
+
conn.commit()
|
99 |
+
else:
|
100 |
+
print("Committing votelog transaction without scheduler lock.")
|
101 |
+
conn.commit()
|
102 |
+
print("Closing cursor after logging vote.")
|
103 |
+
cursor.close()
|
104 |
+
print(f"Upvoting model: {model2}")
|
105 |
+
upvote_model(model2, str(userid), prompt, response2)
|
106 |
+
print(f"Downvoting model: {model1}")
|
107 |
+
downvote_model(model1, str(userid), prompt, response1)
|
108 |
+
print("Reloading UI after voting.")
|
109 |
+
return reload(model1, model2, userid, chose_b=True)
|
110 |
+
|
111 |
+
# Reload
|
112 |
+
|
113 |
+
def reload(chosenmodel1=None, chosenmodel2=None, userid=None, chose_a=False, chose_b=False):
|
114 |
+
out = [
|
115 |
+
gr.update(interactive=False), # a_better
|
116 |
+
gr.update(interactive=False), # b_better
|
117 |
+
gr.update(value=f"Selected: {chosenmodel1}" if chose_a else chosenmodel1,
|
118 |
+
interactive=False,
|
119 |
+
visible=True), # model1_name
|
120 |
+
gr.update(value=f"Selected: {chosenmodel2}" if chose_b else chosenmodel2,
|
121 |
+
interactive=False,
|
122 |
+
visible=True) # model2_name
|
123 |
+
]
|
124 |
+
return out
|
database.db
ADDED
Binary file (24.6 kB). View file
|
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
pandas
|
2 |
+
sqlite3
|
3 |
+
huggingface_hub
|
scripts/view_db.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sqlite3
|
2 |
+
import pandas as pd
|
3 |
+
from tabulate import tabulate
|
4 |
+
|
5 |
+
def view_db_content():
|
6 |
+
conn = sqlite3.connect("database.db")
|
7 |
+
|
8 |
+
# Get models and their votes
|
9 |
+
print("\n=== Models and Vote Counts ===")
|
10 |
+
models_df = pd.read_sql_query("""
|
11 |
+
SELECT
|
12 |
+
name,
|
13 |
+
upvote,
|
14 |
+
downvote,
|
15 |
+
CAST(upvote AS FLOAT) / NULLIF(upvote + downvote, 0) * 100 as win_rate,
|
16 |
+
upvote + downvote as total_votes
|
17 |
+
FROM model
|
18 |
+
ORDER BY win_rate DESC
|
19 |
+
""", conn)
|
20 |
+
print(tabulate(models_df, headers='keys', tablefmt='psql', showindex=False))
|
21 |
+
|
22 |
+
# Get recent votes with response previews
|
23 |
+
print("\n=== Recent Votes ===")
|
24 |
+
votes_df = pd.read_sql_query("""
|
25 |
+
SELECT
|
26 |
+
username,
|
27 |
+
model,
|
28 |
+
CASE
|
29 |
+
WHEN vote = 1 THEN 'upvote'
|
30 |
+
ELSE 'downvote'
|
31 |
+
END as vote_type,
|
32 |
+
substr(prompt, 1, 50) || '...' as prompt_preview,
|
33 |
+
substr(response, 1, 50) || '...' as response_preview,
|
34 |
+
datetime(timestamp, 'localtime') as local_time
|
35 |
+
FROM vote
|
36 |
+
ORDER BY timestamp DESC
|
37 |
+
LIMIT 10
|
38 |
+
""", conn)
|
39 |
+
print(tabulate(votes_df, headers='keys', tablefmt='psql', showindex=False))
|
40 |
+
|
41 |
+
# Get recent battles with response previews
|
42 |
+
print("\n=== Recent Battles ===")
|
43 |
+
battles_df = pd.read_sql_query("""
|
44 |
+
SELECT
|
45 |
+
username,
|
46 |
+
chosen as winner,
|
47 |
+
rejected as loser,
|
48 |
+
substr(prompt, 1, 50) || '...' as prompt_preview,
|
49 |
+
substr(chosen_response, 1, 50) || '...' as winner_response,
|
50 |
+
substr(rejected_response, 1, 50) || '...' as loser_response,
|
51 |
+
datetime(timestamp, 'localtime') as local_time
|
52 |
+
FROM votelog
|
53 |
+
ORDER BY timestamp DESC
|
54 |
+
LIMIT 10
|
55 |
+
""", conn)
|
56 |
+
print(tabulate(battles_df, headers='keys', tablefmt='psql', showindex=False))
|
57 |
+
|
58 |
+
conn.close()
|
59 |
+
|
60 |
+
if __name__ == "__main__":
|
61 |
+
view_db_content()
|