File size: 8,221 Bytes
910a6fe 1d52f1e aa4b227 910a6fe aa4b227 910a6fe 1d52f1e 910a6fe aa4b227 910a6fe aa4b227 1d52f1e aa4b227 910a6fe aa4b227 910a6fe aa4b227 910a6fe aa4b227 910a6fe aa4b227 910a6fe aa4b227 910a6fe aa4b227 910a6fe aa4b227 910a6fe aa4b227 910a6fe aa4b227 1d52f1e aa4b227 1d52f1e aa4b227 1d52f1e aa4b227 910a6fe aa4b227 910a6fe aa4b227 910a6fe aa4b227 910a6fe aa4b227 1d52f1e aa4b227 1d52f1e 910a6fe aa4b227 910a6fe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
import gradio as gr
import random
from datasets import load_dataset, Features, Value
import json
import os
import uuid
from huggingface_hub import HfApi
import time
# # Sample dataset with unique 10-digit IDs
# qa_dataset = {
# "1234567890": {
# "question": "What is the capital of France?",
# "choices": ["A. Berlin", "B. Madrid", "C. Paris", "D. Lisbon"],
# "answer": "C. Paris"
# },
# "0987654321": {
# "question": "What is the largest planet in our solar system?",
# "choices": ["A. Earth", "B. Jupiter", "C. Saturn", "D. Mars", "E. Venus"],
# "answer": "B. Jupiter"
# },
# # Add more questions with unique IDs as needed
# }
truth_data = load_dataset("commonsense-index-dev/commonsense-candidates", "iter7-0520", split="train")
LAST_LOG_UPDATE = time.time()
logs = None
qa_dataset = {}
for item in truth_data:
qa_dataset[item["id"]] = {
"question": item["task"],
"choices": item["choices"],
"answer": item["answer"]
}
if "metadata" in item:
qa_dataset[item["id"]]["reason"] = item["metadata"].get("reasoning", "N/A")
def update_logs():
global LAST_LOG_UPDATE
global logs
if logs is None or time.time() - LAST_LOG_UPDATE > 3600:
# update logs for every 60 minutes
logs = load_dataset("commonsense-index-dev/DemoFeedback", split="train")
LAST_LOG_UPDATE = time.time()
def get_random_question(user_name="Anonymous"):
global logs
update_logs()
# if user_name == "":
# user_name = "Anonymous"
# question_id = random.choice(list(qa_dataset.keys()))
# else:
# logs = load_dataset("commonsense-index-dev/DemoFeedback", split="train")
feedback_counts = {qid: 0 for qid in qa_dataset.keys()}
user_seen_data = set()
for item in logs:
feedback_counts[item["question_id"]] += 1
if item["user_name"] == user_name:
user_seen_data.add(item["question_id"])
# sample a question that has the least feedback, and if there are multiple, sample randomly
min_feedback = min(feedback_counts.values())
question_ids = [k for k, v in feedback_counts.items() if v == min_feedback]
question_ids = list(set(question_ids) - user_seen_data)
question_id = random.choice(question_ids)
question_data = qa_dataset[question_id]
reasoning = question_data["reason"]
return question_id, question_data["question"], question_data["choices"], reasoning
def get_question_by_id(question_id):
if question_id in qa_dataset:
question_data = qa_dataset[question_id]
return question_id, question_data["question"], question_data["choices"]
else:
return None, "Invalid question ID", []
def check_answer(question_id, choice, reasoning):
correct_answer = qa_dataset[question_id]["answer"]
text = ""
if choice[3:] == correct_answer:
text += "### β
Correct!"
text += "\n### Reasoning: " + reasoning
else:
text += "### β Incorrect. Try again!"
return text
def load_question(question_id=None, user_name="Anonymous"):
question_id, question, choices, reasoning = get_random_question(user_name)
question = f"---\n#### QID: {question_id}\n## {question} \n---"
choices_markdown = "\n".join(choices)
return question_id, question, choices_markdown, \
gr.update(value="", visible=True), reasoning, \
gr.update(value="", visible=True), \
gr.update(value="Submit your feedback! π", interactive=True)
def show_buttons(choices_markdown):
choices = choices_markdown.split("\n")
visibility = [gr.update(visible=False)] * 10
for i in range(len(choices)):
# generate ABCDEFGHIJ labels
choices[i] = chr(65 + i) + ") " + choices[i]
visibility[i] = gr.update(visible=True, value=choices[i])
return visibility
def submit_feedback(question_id, user_reason, user_revision, example_quality, user_name_text):
if "N/A" in question_id or "N/A" in example_quality:
# send a message to the user to sample an example and select a choice first
return {
submit_button: {"interactive": True, "__type__": "update", "value": "Submit your feedback! π Please sample an example and select a choice!"},
}
# create a jsonl file and upload it to hf
if user_name_text == "":
user_name_text = "Anonymous"
feedback_item = {
"question_id": question_id,
"user_name": user_name_text,
"user_reason": user_reason,
"revision": user_revision,
"example_quality": example_quality,
}
jsonl_str = json.dumps(feedback_item)
api = HfApi()
token = os.getenv("HF_TOKEN")
if token is None:
raise ValueError("Hugging Face token not found. Ensure the HF_TOKEN environment variable is set.")
# Generate a random filename using UUID
filename = f"{uuid.uuid4()}.json"
# Define the repository
repo_id = "commonsense-index-dev/DemoFeedback"
# Upload the json_str as a file directly to the specified path in your dataset repository
api.upload_file(
token=token,
repo_id=repo_id,
repo_type="dataset",
path_or_fileobj=jsonl_str.encode("utf-8"), # Convert string to bytes
path_in_repo=filename,
commit_message=f"{user_name_text}'s feedback on {question_id}",
)
return {
submit_button: {"interactive": False, "__type__": "update", "value": "Submitted! β
\n Please sample the next one."}
}
def refresh_feedback(question_id):
return gr.update(value="", visible=True), gr.update(value="", visible=True), gr.update(value="", visible=True), gr.update(value="", visible=True)
with gr.Blocks() as app:
gr.Markdown("# Commonsense Index Data Viewer")
with gr.Row():
# question_id_input = gr.Textbox(label="Enter Question ID", placeholder="leave empty for random sampling")
random_button = gr.Button("π² Click here to randomly sample an example")
question_display = gr.Markdown(visible=True)
choices_markdown = gr.Markdown(visible=False)
choice_buttons = [gr.Button(visible=False) for _ in range(10)]
result_display = gr.Markdown(visible=True)
reasoning_display = gr.Markdown(visible=False)
question_id = gr.Textbox(label="Question ID:", interactive=False, visible=False)
with gr.Row():
with gr.Column(scale=2):
reason_textbox = gr.Textbox(label="Reason", placeholder="Please talk why the correct answer is correct and why the others are wrong. If you think this is a bad example, please explain too.", type="text", elem_classes="", max_lines=5, lines=3, show_copy_button=False, visible=True, scale=4, interactive=True)
revision_textbox = gr.Textbox(label="Revision", placeholder="Please suggest a revision to the question.", type="text", elem_classes="", max_lines=5, lines=3, show_copy_button=False, visible=True, scale=4, interactive=True)
with gr.Column():
example_quality = gr.Radio(label="Quality", choices=["Good", "Bad"], interactive=True, visible=True)
user_name = gr.Textbox(label="Your username", placeholder="Your username", type="text", elem_classes="", max_lines=1, show_copy_button=False, visible=True, interactive=True, show_label=False)
submit_button = gr.Button("Submit your feedback! π", elem_classes="btn_boderline", visible=True, interactive=True)
random_button.click(fn=load_question, inputs=[user_name], outputs=[question_id, question_display, choices_markdown, result_display, reasoning_display, example_quality, submit_button])
choices_markdown.change(fn=show_buttons, inputs=choices_markdown, outputs=choice_buttons)
question_id.change(fn=refresh_feedback, inputs=[question_id], outputs=[reason_textbox, revision_textbox, example_quality])
submit_button.click(fn=submit_feedback, inputs=[question_id, reason_textbox, revision_textbox, example_quality, user_name], outputs=[submit_button])
for i, button in enumerate(choice_buttons):
button.click(fn=check_answer, inputs=[question_id, button, reasoning_display], outputs=result_display)
app.launch() |