Spaces:
Runtime error
Runtime error
File size: 7,254 Bytes
15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 9d92eeb 76ed6d2 15bbe10 76ed6d2 15bbe10 76ed6d2 15bbe10 76ed6d2 15bbe10 9d92eeb 76ed6d2 15bbe10 c9e00de 15bbe10 9d92eeb 76ed6d2 15bbe10 c9e00de 9d92eeb 76ed6d2 c9e00de 15bbe10 9d92eeb 76ed6d2 15bbe10 9d92eeb 76ed6d2 9d92eeb 76ed6d2 9d92eeb 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 76ed6d2 15bbe10 c9e00de 9d92eeb 15bbe10 9d92eeb 15bbe10 76ed6d2 15bbe10 c9e00de 76ed6d2 c9e00de 15bbe10 76ed6d2 c9e00de 15bbe10 76ed6d2 15bbe10 c9e00de 15bbe10 c9e00de 76ed6d2 15bbe10 c9e00de 76ed6d2 15bbe10 76ed6d2 15bbe10 c9e00de |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
import numpy as np
from models import chat_with_model, embed
from prompts import create_gen_prompt, create_judge_prompt
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
import streamlit as st # Import Streamlit
def generate_answer(question, previous_answers, model_name, open_router_key, openai_api_key):
"""Generates an answer to a question using the specified language model."""
gen_prompt = create_gen_prompt(question, previous_answers)
try:
new_answer = chat_with_model(prompt=gen_prompt, model=model_name, open_router_key=open_router_key,
openai_api_key=openai_api_key)
return new_answer
except Exception as e:
st.write(f"<span style='color:red'>Error generating answer: {str(e)}</span>",
unsafe_allow_html=True)
return None
def evaluate_answer(question, new_answer, open_router_key, openai_api_key):
"""Evaluates the coherence and novelty of an answer."""
judge_prompt = create_judge_prompt(question, new_answer)
judge = "openai/gpt-4o-mini"
try:
judge_response = chat_with_model(prompt=judge_prompt, model=judge, open_router_key=open_router_key,
openai_api_key=openai_api_key)
coherence_score = int(judge_response.split("<coherence_score>")[1].split("</coherence_score>")[0])
return coherence_score
except Exception as e:
st.write(f"<span style='color:red'>Error getting judge response: {str(e)}</span>",
unsafe_allow_html=True)
return None
def process_question(question, model_name, open_router_key, openai_api_key, progress_lock, completed_questions, total_questions, progress):
start_time = time.time()
st.write(f"<span style='color:red'>{question}</span>", unsafe_allow_html=True)
previous_answers = []
question_novelty = 0
try:
while True:
new_answer = generate_answer(question, previous_answers, model_name, open_router_key, openai_api_key)
if new_answer is None:
break
coherence_score = evaluate_answer(question, new_answer, open_router_key, openai_api_key)
if coherence_score is None:
break
if coherence_score <= 3:
st.write("<span style='color:yellow'>Output is incoherent. Moving to next question.</span>",
unsafe_allow_html=True)
break
novelty_score = get_novelty_score(new_answer, previous_answers, openai_api_key)
if novelty_score < 0.1:
st.write("<span style='color:yellow'>Output is redundant. Moving to next question.</span>",
unsafe_allow_html=True)
break
st.write(f"**New Answer:**\n{new_answer}")
st.write(f"<span style='color:green'>Coherence Score: {coherence_score}</span>",
unsafe_allow_html=True)
st.write(f"**Novelty Score:** {novelty_score}")
previous_answers.append(new_answer)
question_novelty += novelty_score
except Exception as e:
st.write(f"<span style='color:red'>Unexpected error processing question: {str(e)}</span>",
unsafe_allow_html=True)
time_taken = time.time() - start_time
st.write(f"<span style='color:blue'>Total novelty score for this question: {question_novelty}</span>",
unsafe_allow_html=True)
st.write(f"<span style='color:blue'>Time taken: {time_taken} seconds</span>",
unsafe_allow_html=True)
# Update progress
with progress_lock:
completed_questions += 1
progress = completed_questions / total_questions
return question_novelty, [
{
"question": question,
"answers": previous_answers,
"coherence_score": coherence_score,
"novelty_score": question_novelty
}
]
def get_novelty_score(new_answer: str, previous_answers: list, openai_api_key):
new_embedding = embed(new_answer, openai_api_key)
# If there are no previous answers, return maximum novelty
if not previous_answers:
return 1.0
previous_embeddings = [embed(answer, openai_api_key) for answer in previous_answers]
similarities = [
np.dot(new_embedding, prev_embedding) /
(np.linalg.norm(new_embedding) * np.linalg.norm(prev_embedding))
for prev_embedding in previous_embeddings
]
max_similarity = max(similarities)
novelty = 1 - max_similarity
return novelty
def benchmark_model_multithreaded(model_name, questions, open_router_key, openai_api_key, max_threads=None, progress=0, progress_lock=None):
novelty_score = 0
print_lock = threading.Lock() # Lock for thread-safe printing
results = []
completed_questions = 0 # Shared variable to track progress
# Use max_threads if provided, otherwise default to the number of questions
if max_threads is None:
max_workers = len(questions)
else:
max_workers = max_threads
with ThreadPoolExecutor(max_workers=max_workers) as executor:
future_to_question = {executor.submit(
process_question, question, model_name, open_router_key, openai_api_key, progress_lock, completed_questions, len(questions), progress): question for question in questions}
for future in as_completed(future_to_question):
question = future_to_question[future]
try:
question_novelty, question_results = future.result()
with print_lock:
novelty_score += question_novelty
results.extend(question_results)
st.write(
f"<span style='color:yellow'>Total novelty score across all questions (so far): {novelty_score}</span>",
unsafe_allow_html=True)
except Exception as e:
with print_lock:
st.write(f"<span style='color:red'>Error in thread: {str(e)}</span>", unsafe_allow_html=True)
st.write(f"<span style='color:yellow'>Final total novelty score across all questions: {novelty_score}</span>",
unsafe_allow_html=True)
return results
def benchmark_model_sequential(model_name, questions, open_router_key, openai_api_key, progress=0, progress_lock=None):
novelty_score = 0
results = []
for i, question in enumerate(questions):
question_novelty, question_results = process_question(question, model_name, open_router_key, openai_api_key,
progress_lock, i, len(questions), progress)
novelty_score += question_novelty
results.extend(question_results)
st.write(
f"<span style='color:yellow'>Total novelty score across processed questions: {novelty_score}</span>",
unsafe_allow_html=True) # Display progress after each question
st.write(f"<span style='color:yellow'>Final total novelty score across all questions: {novelty_score}</span>",
unsafe_allow_html=True)
return results
|