poemsforaphrodite commited on
Commit
05e591f
1 Parent(s): 80476ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +521 -122
app.py CHANGED
@@ -14,7 +14,7 @@ from pinecone import Pinecone, ServerlessSpec
14
  import threading # {{ edit_25: Import threading for background processing }}
15
  import tiktoken
16
  from tiktoken.core import Encoding
17
- from runner import run_model
18
  from bson.objectid import ObjectId
19
  import traceback # Add this import at the top of your file
20
  import umap
@@ -22,6 +22,39 @@ import plotly.graph_objs as go
22
  from sklearn.preprocessing import StandardScaler
23
  from sklearn.cluster import KMeans
24
  import plotly.colors as plc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  # Add this helper function at the beginning of your file
27
  def extract_prompt_text(prompt):
@@ -81,8 +114,6 @@ def signup(username, password):
81
  "models": [] # List to store user's models
82
  })
83
  return True
84
- def upload_model(file):
85
- return "Model uploaded successfully!"
86
 
87
  # Function to perform evaluation (placeholder)
88
  def evaluate_model(model_identifier, metrics, username):
@@ -151,10 +182,9 @@ def generate_embedding(text):
151
  try:
152
  embedding_response = openai_client.embeddings.create(
153
  model="text-embedding-3-large", # {{ edit_3: Use the specified embedding model }}
154
- input=text,
155
- encoding_format="float"
156
  )
157
- embedding = embedding_response["data"][0]["embedding"]
158
  return embedding
159
  except Exception as e:
160
  st.error(f"Error generating embedding: {str(e)}")
@@ -215,6 +245,7 @@ def index_context_data(model_name, texts):
215
  ])
216
  except Exception as e:
217
  st.error(f"Error indexing data to Pinecone: {str(e)}")
 
218
  def upload_model(file, username, model_type):
219
  # {{ edit_5: Modify upload_model to handle model_type }}
220
  model_id = f"{username}_model_{int(datetime.now().timestamp())}"
@@ -251,7 +282,56 @@ def upload_model(file, username, model_type):
251
  return f"Named Model {model_id} registered successfully!"
252
  else:
253
  return "Invalid model type specified."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  # Function to save results to MongoDB
256
  def save_results(username, model, prompt, context, response, evaluation): # {{ edit_29: Add 'username' parameter }}
257
  result = {
@@ -267,6 +347,87 @@ def save_results(username, model, prompt, context, response, evaluation): # {{
267
  }
268
  results_collection.insert_one(result)
269
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  # Modify the run_custom_evaluations function
271
  def run_custom_evaluations(data, selected_model, username):
272
  try:
@@ -278,12 +439,16 @@ def run_custom_evaluations(data, selected_model, username):
278
  # For simple models, data is already in the correct format
279
  test_cases = data
280
  else:
281
- # For other models, data is split into context_dataset and questions
282
  context_dataset, questions = data
 
 
 
 
283
  test_cases = [
284
  {
285
  "prompt": extract_prompt_text(question),
286
- "context": context_dataset,
287
  "response": "" # This will be filled by the model
288
  }
289
  for question in questions
@@ -291,11 +456,18 @@ def run_custom_evaluations(data, selected_model, username):
291
 
292
  for test_case in test_cases:
293
  prompt_text = test_case["prompt"]
 
 
 
 
 
 
294
  context = test_case["context"]
295
 
296
  # Get the student model's response using runner.py
297
  try:
298
- answer = run_model(model_name, prompt_text)
 
299
  if answer is None or answer == "":
300
  st.warning(f"No response received from the model for prompt: {prompt_text}")
301
  answer = "No response received from the model."
@@ -421,17 +593,43 @@ if not st.session_state.user:
421
  st.sidebar.error("Username already exists")
422
  else:
423
  st.sidebar.success(f"Welcome, {st.session_state.user}!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
424
  if st.sidebar.button("Logout"):
425
  st.session_state.user = None
 
426
  st.rerun()
427
 
428
-
429
-
430
  # App content
431
  if st.session_state.user:
432
- app_mode = st.sidebar.selectbox("Choose the section", ["Dashboard", "Model Upload", "Evaluation", "Prompt Testing", "Manage Models", "History"]) # {{ edit_add: Added "History" to the sidebar navigation }}
 
433
 
434
- if app_mode == "Dashboard":
435
  st.title("Dashboard")
436
  st.write("### Real-time Metrics and Performance Insights")
437
 
@@ -844,7 +1042,7 @@ if st.session_state.user:
844
  - **Model Performance**: Analyze clusters to identify strengths and weaknesses of models.
845
  - **Data Patterns**: Use clustering to uncover hidden patterns in your evaluation data.
846
 
847
- **Tips:**
848
 
849
  - Experiment with different numbers of clusters to find meaningful groupings.
850
  - Adjust UMAP parameters to see how the clustering changes with different embeddings.
@@ -961,7 +1159,7 @@ if st.session_state.user:
961
  st.error(traceback.format_exc())
962
  st.stop()
963
 
964
- elif app_mode == "Model Upload":
965
  st.title("Upload Your Model")
966
  model_type = st.radio("Select Model Type", ["Custom", "Named"]) # {{ edit_6: Select model type }}
967
  uploaded_file = st.file_uploader("Choose a model file", type=[".pt", ".h5", ".bin"]) if model_type == "custom" else None
@@ -976,7 +1174,7 @@ if st.session_state.user:
976
  else:
977
  st.error("Please upload a valid model file for Custom models.")
978
 
979
- elif app_mode == "Evaluation":
980
  st.title("Evaluate Your Model")
981
  st.write("### Select Model and Evaluation Metrics")
982
 
@@ -1015,108 +1213,290 @@ if st.session_state.user:
1015
  else:
1016
  st.error("Selected model not found.")
1017
 
1018
- elif app_mode == "Prompt Testing":
1019
  st.title("Prompt Testing")
1020
 
1021
- model_selection_option = st.radio("Select Model Option:", ["Choose Existing Model", "Add New Model"])
 
1022
 
1023
- if model_selection_option == "Choose Existing Model":
1024
- user = users_collection.find_one({"username": st.session_state.user})
1025
- user_models = user.get("models", [])
1026
-
1027
- if not user_models:
1028
- st.error("You have no uploaded models. Please upload a model first.")
1029
- else:
1030
- model_options = [
1031
- f"{model['model_name']} ({model.get('model_type', 'Unknown').capitalize()})"
1032
- for model in user_models
1033
- ]
1034
- selected_model = st.selectbox("Select a Model for Testing", model_options)
1035
-
1036
- model_name = selected_model.split(" (")[0]
1037
- model_type = selected_model.split(" (")[1].rstrip(")")
1038
  else:
1039
- # Code for adding a new model (unchanged)
1040
- ...
1041
-
1042
- st.subheader("Input for Model Testing")
1043
-
1044
- # For simple models, we'll use a single JSON file
1045
- if model_type.lower() == "simple":
1046
- st.write("For simple models, please upload a single JSON file containing prompts, contexts, and responses.")
1047
- json_file = st.file_uploader("Upload Test Data JSON", type=["json"])
1048
 
1049
- if json_file is not None:
1050
- try:
1051
- test_data = json.load(json_file)
1052
- st.success("Test data JSON file uploaded successfully!")
1053
-
1054
- # Display a preview of the test data
1055
- st.write("Preview of test data:")
1056
- st.json(test_data[:3] if len(test_data) > 3 else test_data)
 
 
 
 
 
 
 
 
1057
 
1058
- except json.JSONDecodeError:
1059
- st.error("Invalid JSON format. Please check your file.")
1060
- else:
1061
- test_data = None
1062
- else:
1063
- # For other model types, keep the existing separate inputs for context and questions
1064
- context_input_method = st.radio("Choose context input method:", ["Text Input", "File Upload"])
1065
- if context_input_method == "Text Input":
1066
- context_dataset = st.text_area("Enter Context Dataset (txt):", height=200)
1067
- else:
1068
- context_file = st.file_uploader("Upload Context Dataset", type=["txt"])
1069
- if context_file is not None:
1070
- context_dataset = context_file.getvalue().decode("utf-8")
1071
- st.success("Context file uploaded successfully!")
1072
- else:
1073
- context_dataset = None
 
 
 
 
 
 
 
1074
 
1075
- questions_input_method = st.radio("Choose questions input method:", ["Text Input", "File Upload"])
1076
- if questions_input_method == "Text Input":
1077
- questions_json = st.text_area("Enter Questions (JSON format):", height=200)
1078
- else:
1079
- questions_file = st.file_uploader("Upload Questions JSON", type=["json"])
1080
- if questions_file is not None:
1081
- questions_json = questions_file.getvalue().decode("utf-8")
1082
- st.success("Questions file uploaded successfully!")
1083
- else:
1084
- questions_json = None
1085
-
1086
- if st.button("Run Test"):
1087
- if not model_name:
1088
- st.error("Please select or add a valid Model.")
1089
- elif model_type.lower() == "simple" and test_data is None:
1090
- st.error("Please upload a valid test data JSON file.")
1091
- elif model_type.lower() != "simple" and (not context_dataset or not questions_json):
1092
- st.error("Please provide both context dataset and questions JSON.")
1093
- else:
1094
- try:
1095
- selected_model = next(
1096
- (m for m in user_models if m['model_name'] == model_name),
1097
- None
1098
- )
1099
- if selected_model:
1100
- with st.spinner("Starting evaluations..."):
1101
- if model_type.lower() == "simple":
1102
- evaluation_thread = threading.Thread(
1103
- target=run_custom_evaluations,
1104
- args=(test_data, selected_model, st.session_state.user)
1105
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1106
  else:
1107
- questions = json.loads(questions_json)
1108
- evaluation_thread = threading.Thread(
1109
- target=run_custom_evaluations,
1110
- args=((context_dataset, questions), selected_model, st.session_state.user)
1111
- )
1112
- evaluation_thread.start()
1113
- st.success("Evaluations are running in the background. You can navigate away or close the site.")
 
 
 
1114
  else:
1115
- st.error("Selected model not found.")
1116
- except json.JSONDecodeError:
1117
- st.error("Invalid JSON format. Please check your input.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1118
 
1119
- elif app_mode == "Manage Models":
1120
  st.title("Manage Your Models")
1121
  # Fetch the user from the database
1122
  user = users_collection.find_one({"username": st.session_state.user})
@@ -1135,17 +1515,17 @@ if st.session_state.user:
1135
  )
1136
 
1137
  st.subheader("Add a New Model")
1138
- model_type = st.radio("Select Model Type:", ["Simple Model", "Custom Model"])
1139
 
1140
  if model_type == "Simple Model":
1141
  new_model_name = st.text_input("Enter New Model Name:")
1142
- if st.button("Add Simple Model") or st.button("Add Custom Model"):
1143
- if new_model_name or selected_custom_model:
1144
  model_id = f"{st.session_state.user}_model_{int(datetime.now().timestamp())}"
1145
  model_data = {
1146
  "model_id": model_id,
1147
- "model_name": new_model_name if model_type == "Simple Model" else selected_custom_model,
1148
- "model_type": "simple" if model_type == "Simple Model" else "custom",
1149
  "file_path": None,
1150
  "model_link": None,
1151
  "uploaded_at": datetime.now(),
@@ -1155,11 +1535,11 @@ if st.session_state.user:
1155
  {"username": st.session_state.user},
1156
  {"$push": {"models": model_data}}
1157
  )
1158
- st.success(f"Model '{model_data['model_name']}' added successfully as {model_id}!")
1159
  else:
1160
- st.error("Please enter a valid model name or select a custom model.")
1161
 
1162
- else: # Custom Model
1163
  custom_model_options = ["gpt-4o", "gpt-4o-mini"]
1164
  selected_custom_model = st.selectbox("Select Custom Model:", custom_model_options)
1165
 
@@ -1177,6 +1557,28 @@ if st.session_state.user:
1177
  }}}
1178
  )
1179
  st.success(f"Custom Model '{selected_custom_model}' added successfully as {model_id}!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1180
 
1181
  st.markdown("---")
1182
 
@@ -1202,10 +1604,11 @@ if st.session_state.user:
1202
  {"$pull": {"models": {"model_id": model['model_id']}}}
1203
  )
1204
  st.success(f"Model {model['model_id']} deleted successfully!")
 
1205
  else:
1206
  st.info("You have no uploaded models.")
1207
 
1208
- elif app_mode == "History": # {{ edit_add: Enhanced History UI }}
1209
  st.title("History")
1210
  st.write("### Your Evaluation History")
1211
 
@@ -1285,8 +1688,4 @@ if st.session_state.user:
1285
  st.info("You have no evaluation history yet.")
1286
 
1287
  except Exception as e:
1288
- st.error(f"Error fetching history data: {e}")
1289
-
1290
- # Add a footer
1291
- st.sidebar.markdown("---")
1292
- st.sidebar.info("LLM Evaluation System - v0.2")
 
14
  import threading # {{ edit_25: Import threading for background processing }}
15
  import tiktoken
16
  from tiktoken.core import Encoding
17
+ from runner import run_model, summarize_image # {{ edit_add: Import necessary functions }}
18
  from bson.objectid import ObjectId
19
  import traceback # Add this import at the top of your file
20
  import umap
 
22
  from sklearn.preprocessing import StandardScaler
23
  from sklearn.cluster import KMeans
24
  import plotly.colors as plc
25
+ import uuid
26
+ import time # Add this import at the top of your file
27
+ from streamlit_webrtc import webrtc_streamer, WebRtcMode, RTCConfiguration, AudioProcessorBase
28
+ import av
29
+ import io
30
+ from typing import List
31
+ import requests
32
+ import traceback
33
+ # Add these imports at the beginning of your file
34
+ from pydub import AudioSegment
35
+
36
+ # Add this import at the top of your file
37
+ import tempfile
38
+
39
+ # Add this helper function for audio recording
40
+ def process_audio(frame):
41
+ sound = frame.to_ndarray()
42
+ sound = sound.astype(np.int16)
43
+ return av.AudioFrame.from_ndarray(sound, layout="mono")
44
+
45
+ # Add this helper function to convert WebRTC audio to a file
46
+ def webrtc_audio_to_file(audio_frames):
47
+ audio = AudioSegment.empty()
48
+ for frame in audio_frames:
49
+ audio += AudioSegment(
50
+ data=frame.to_ndarray().tobytes(),
51
+ sample_width=frame.format.bytes,
52
+ frame_rate=frame.sample_rate,
53
+ channels=1
54
+ )
55
+ buffer = io.BytesIO()
56
+ audio.export(buffer, format="wav")
57
+ return buffer.getvalue()
58
 
59
  # Add this helper function at the beginning of your file
60
  def extract_prompt_text(prompt):
 
114
  "models": [] # List to store user's models
115
  })
116
  return True
 
 
117
 
118
  # Function to perform evaluation (placeholder)
119
  def evaluate_model(model_identifier, metrics, username):
 
182
  try:
183
  embedding_response = openai_client.embeddings.create(
184
  model="text-embedding-3-large", # {{ edit_3: Use the specified embedding model }}
185
+ input=text
 
186
  )
187
+ embedding = embedding_response.data[0].embedding
188
  return embedding
189
  except Exception as e:
190
  st.error(f"Error generating embedding: {str(e)}")
 
245
  ])
246
  except Exception as e:
247
  st.error(f"Error indexing data to Pinecone: {str(e)}")
248
+
249
  def upload_model(file, username, model_type):
250
  # {{ edit_5: Modify upload_model to handle model_type }}
251
  model_id = f"{username}_model_{int(datetime.now().timestamp())}"
 
282
  return f"Named Model {model_id} registered successfully!"
283
  else:
284
  return "Invalid model type specified."
285
+ # {{ edit_30: Display uploaded models in the UI after uploading }}
286
+ st.write("### Uploaded Models")
287
+ user = users_collection.find_one({"username": username})
288
+ user_models = user.get("models", [])
289
+ for model in user_models:
290
+ st.write(f"- **{model['model_name']}** (ID: {model['model_id']})")
291
+
292
+ def run_huggingface_evaluations(data, selected_model, username):
293
+ try:
294
+ model_name = selected_model['model_name']
295
+ model_id = selected_model['model_id']
296
+ api_endpoint = selected_model.get('model_link')
297
+ api_token = selected_model.get('model_api_token')
298
+
299
+ if not api_endpoint or not api_token:
300
+ st.error("API endpoint or token is missing for the selected Hugging Face model.")
301
+ return
302
 
303
+ headers = {
304
+ "Authorization": f"Bearer {api_token}",
305
+ "Content-Type": "application/json"
306
+ }
307
+
308
+ for test_case in data:
309
+ prompt = test_case.get("prompt", "")
310
+ context = test_case.get("context", "")
311
+
312
+ # Prepare the payload for the Hugging Face API
313
+ payload = {
314
+ "inputs": f"Context: {context}\n\nPrompt: {prompt}"
315
+ }
316
+
317
+ # Make the API call to the Hugging Face model
318
+ response = requests.post(api_endpoint, headers=headers, json=payload)
319
+
320
+ if response.status_code == 200:
321
+ model_output = response.json()[0]['generated_text']
322
+
323
+ # Get the teacher's evaluation
324
+ evaluation = teacher_evaluate(prompt, context, model_output)
325
+
326
+ # Save the results
327
+ save_results(username, selected_model, prompt, context, model_output, evaluation)
328
+ else:
329
+ st.error(f"Error calling Hugging Face API: {response.status_code} - {response.text}")
330
+
331
+ st.success("Hugging Face model evaluation completed successfully!")
332
+ except Exception as e:
333
+ st.error(f"Error in Hugging Face evaluation: {str(e)}")
334
+ st.error(f"Detailed error: {traceback.format_exc()}")
335
  # Function to save results to MongoDB
336
  def save_results(username, model, prompt, context, response, evaluation): # {{ edit_29: Add 'username' parameter }}
337
  result = {
 
347
  }
348
  results_collection.insert_one(result)
349
 
350
+ # Function to chunk text
351
+ def chunk_text(text, max_tokens=500):
352
+ tokens = tokenizer.encode(text)
353
+ chunks = []
354
+ current_chunk = []
355
+ current_length = 0
356
+
357
+ for token in tokens:
358
+ if current_length + 1 > max_tokens:
359
+ chunks.append(tokenizer.decode(current_chunk))
360
+ current_chunk = []
361
+ current_length = 0
362
+ current_chunk.append(token)
363
+ current_length += 1
364
+
365
+ if current_chunk:
366
+ chunks.append(tokenizer.decode(current_chunk))
367
+
368
+ return chunks
369
+
370
+ # Function to upload context to Pinecone
371
+ def upload_context_to_pinecone(context, username, model_name):
372
+ chunks = chunk_text(context)
373
+ index = pinecone_client.Index(os.getenv('PINECONE_INDEX_NAME'))
374
+
375
+ namespace = f"{username}_{model_name}" # Create a unique namespace for each user-model combination
376
+
377
+ for chunk in chunks:
378
+ embedding = generate_embedding(chunk)
379
+ if embedding:
380
+ index.upsert([
381
+ {
382
+ "id": str(uuid.uuid4()),
383
+ "values": embedding,
384
+ "metadata": {"text": chunk}
385
+ }
386
+ ], namespace=namespace) # Use the namespace when upserting
387
+
388
+ # Function to retrieve relevant context from Pinecone
389
+ def retrieve_context_from_pinecone(prompt, username, model_name):
390
+ index = pinecone_client.Index(os.getenv('PINECONE_INDEX_NAME'))
391
+ prompt_embedding = generate_embedding(prompt)
392
+
393
+ namespace = f"{username}_{model_name}" # Use the same namespace format for retrieval
394
+
395
+ if prompt_embedding:
396
+ results = index.query(
397
+ vector=prompt_embedding,
398
+ top_k=5,
399
+ namespace=namespace, # Use the namespace when querying
400
+ include_metadata=True
401
+ )
402
+
403
+ retrieved_context = " ".join([result.metadata['text'] for result in results.matches])
404
+ return retrieved_context
405
+
406
+ return ""
407
+
408
+ def transcribe_audio(audio_file):
409
+ try:
410
+ # Save the uploaded file to a temporary file
411
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
412
+ temp_audio.write(audio_file.read())
413
+ temp_audio_path = temp_audio.name
414
+
415
+ # Transcribe the audio using OpenAI's Whisper model
416
+ with open(temp_audio_path, "rb") as audio_file:
417
+ transcript = openai_client.audio.transcriptions.create(
418
+ model="whisper-1",
419
+ file=audio_file,
420
+ response_format="text"
421
+ )
422
+
423
+ # Remove the temporary file
424
+ os.unlink(temp_audio_path)
425
+
426
+ return transcript
427
+ except Exception as e:
428
+ st.error(f"Error transcribing audio: {str(e)}")
429
+ return None
430
+
431
  # Modify the run_custom_evaluations function
432
  def run_custom_evaluations(data, selected_model, username):
433
  try:
 
439
  # For simple models, data is already in the correct format
440
  test_cases = data
441
  else:
442
+ # For custom models, data is split into context_dataset and questions
443
  context_dataset, questions = data
444
+
445
+ # Upload context to Pinecone with user and model-specific namespace
446
+ upload_context_to_pinecone(context_dataset, username, model_name)
447
+
448
  test_cases = [
449
  {
450
  "prompt": extract_prompt_text(question),
451
+ "context": "", # This will be filled with retrieved context
452
  "response": "" # This will be filled by the model
453
  }
454
  for question in questions
 
456
 
457
  for test_case in test_cases:
458
  prompt_text = test_case["prompt"]
459
+
460
+ # For custom models, retrieve context from Pinecone using the user and model-specific namespace
461
+ if model_type != 'simple':
462
+ retrieved_context = retrieve_context_from_pinecone(prompt_text, username, model_name)
463
+ test_case["context"] = retrieved_context
464
+
465
  context = test_case["context"]
466
 
467
  # Get the student model's response using runner.py
468
  try:
469
+ # Pass both prompt and context to run_model
470
+ answer = run_model(model_name, prompt_text, context)
471
  if answer is None or answer == "":
472
  st.warning(f"No response received from the model for prompt: {prompt_text}")
473
  answer = "No response received from the model."
 
593
  st.sidebar.error("Username already exists")
594
  else:
595
  st.sidebar.success(f"Welcome, {st.session_state.user}!")
596
+
597
+ # Separate links for each section
598
+ if st.sidebar.button("Dashboard"):
599
+ st.session_state.app_mode = "Dashboard"
600
+ st.rerun()
601
+
602
+ if st.sidebar.button("Model Upload"):
603
+ st.session_state.app_mode = "Model Upload"
604
+ st.rerun()
605
+
606
+ if st.sidebar.button("Evaluation"):
607
+ st.session_state.app_mode = "Evaluation"
608
+ st.rerun()
609
+
610
+ if st.sidebar.button("Prompt Testing"):
611
+ st.session_state.app_mode = "Prompt Testing"
612
+ st.rerun()
613
+
614
+ if st.sidebar.button("Manage Models"):
615
+ st.session_state.app_mode = "Manage Models"
616
+ st.rerun()
617
+
618
+ if st.sidebar.button("History"):
619
+ st.session_state.app_mode = "History"
620
+ st.rerun()
621
+
622
  if st.sidebar.button("Logout"):
623
  st.session_state.user = None
624
+ st.session_state.app_mode = None
625
  st.rerun()
626
 
 
 
627
  # App content
628
  if st.session_state.user:
629
+ if 'app_mode' not in st.session_state:
630
+ st.session_state.app_mode = "Dashboard"
631
 
632
+ if st.session_state.app_mode == "Dashboard":
633
  st.title("Dashboard")
634
  st.write("### Real-time Metrics and Performance Insights")
635
 
 
1042
  - **Model Performance**: Analyze clusters to identify strengths and weaknesses of models.
1043
  - **Data Patterns**: Use clustering to uncover hidden patterns in your evaluation data.
1044
 
1045
+ **Tips:**
1046
 
1047
  - Experiment with different numbers of clusters to find meaningful groupings.
1048
  - Adjust UMAP parameters to see how the clustering changes with different embeddings.
 
1159
  st.error(traceback.format_exc())
1160
  st.stop()
1161
 
1162
+ elif st.session_state.app_mode == "Model Upload":
1163
  st.title("Upload Your Model")
1164
  model_type = st.radio("Select Model Type", ["Custom", "Named"]) # {{ edit_6: Select model type }}
1165
  uploaded_file = st.file_uploader("Choose a model file", type=[".pt", ".h5", ".bin"]) if model_type == "custom" else None
 
1174
  else:
1175
  st.error("Please upload a valid model file for Custom models.")
1176
 
1177
+ elif st.session_state.app_mode == "Evaluation":
1178
  st.title("Evaluate Your Model")
1179
  st.write("### Select Model and Evaluation Metrics")
1180
 
 
1213
  else:
1214
  st.error("Selected model not found.")
1215
 
1216
+ elif st.session_state.app_mode == "Prompt Testing":
1217
  st.title("Prompt Testing")
1218
 
1219
+ user = users_collection.find_one({"username": st.session_state.user})
1220
+ user_models = user.get("models", [])
1221
 
1222
+ if not user_models:
1223
+ st.error("You have no uploaded models. Please upload a model first.")
 
 
 
 
 
 
 
 
 
 
 
 
 
1224
  else:
1225
+ model_options = [
1226
+ f"{model['model_name']} ({model.get('model_type', 'Unknown').capitalize()})"
1227
+ for model in user_models
1228
+ ]
1229
+ selected_model = st.selectbox("Select a Model for Testing", model_options)
 
 
 
 
1230
 
1231
+ model_name = selected_model.split(" (")[0]
1232
+ model_type = selected_model.split(" (")[1].rstrip(")")
1233
+
1234
+ st.subheader("Input for Model Testing")
1235
+
1236
+ if model_type.lower() == "simple":
1237
+ input_type = st.radio("Select Input Type:", ["Text", "Audio", "Image"])
1238
+ elif model_type.lower() == "custom":
1239
+ input_type = "Text"
1240
+ elif model_type.lower() == "huggingface":
1241
+ input_type = "Text"
1242
+
1243
+ if input_type == "Text":
1244
+ if model_type.lower() == "simple":
1245
+ st.write("For simple models, please upload a single JSON file containing prompts, contexts, and responses.")
1246
+ json_file = st.file_uploader("Upload Test Data JSON", type=["json"])
1247
 
1248
+ if json_file is not None:
1249
+ try:
1250
+ test_data = json.load(json_file)
1251
+ st.success("Test data JSON file uploaded successfully!")
1252
+
1253
+ # Display a preview of the test data
1254
+ st.write("Preview of test data:")
1255
+ st.json(test_data[:3] if len(test_data) > 3 else test_data)
1256
+
1257
+ except json.JSONDecodeError:
1258
+ st.error("Invalid JSON format. Please check your file.")
1259
+ else:
1260
+ test_data = None
1261
+ elif model_type.lower() == "custom":
1262
+ # For other model types, keep the existing separate inputs for context and questions
1263
+ context_file = st.file_uploader("Upload Context Dataset", type=["txt"])
1264
+ if context_file is not None:
1265
+ context_dataset = context_file.getvalue().decode("utf-8")
1266
+ st.success("Context file uploaded successfully!")
1267
+ # Upload context to Pinecone with user and model-specific namespace
1268
+ upload_context_to_pinecone(context_dataset, st.session_state.user, model_name)
1269
+ else:
1270
+ context_dataset = None
1271
 
1272
+ questions_file = st.file_uploader("Upload Questions JSON", type=["json"])
1273
+ if questions_file is not None:
1274
+ questions_json = questions_file.getvalue().decode("utf-8")
1275
+ st.success("Questions file uploaded successfully!")
1276
+ else:
1277
+ questions_json = None
1278
+ elif model_type.lower() == "huggingface":
1279
+ st.write("For Hugging Face models, please enter your prompt:")
1280
+ context_file = st.file_uploader("Upload Context Dataset", type=["txt"])
1281
+ if context_file is not None:
1282
+ context_dataset = context_file.getvalue().decode("utf-8")
1283
+ st.success("Context file uploaded successfully!")
1284
+ else:
1285
+ context_dataset = None
1286
+
1287
+ questions_file = st.file_uploader("Upload Questions JSON", type=["json"])
1288
+ if questions_file is not None:
1289
+ questions_json = questions_file.getvalue().decode("utf-8")
1290
+ st.success("Questions file uploaded successfully!")
1291
+ else:
1292
+ questions_json = None
1293
+
1294
+ elif input_type == "Audio":
1295
+ st.write("Please upload audio files for Prompts, Contexts, and Responses.")
1296
+ prompt_audio = st.file_uploader("Upload Prompt Audio", type=["mp3", "wav"])
1297
+ context_audio = st.file_uploader("Upload Context Audio", type=["mp3", "wav"])
1298
+ response_audio = st.file_uploader("Upload Response Audio", type=["mp3", "wav"])
1299
+
1300
+ if prompt_audio:
1301
+ st.audio(prompt_audio, format='audio/wav')
1302
+ st.write(f"**Uploaded Prompt Audio:** {prompt_audio.name}")
1303
+ if context_audio:
1304
+ st.audio(context_audio, format='audio/wav')
1305
+ st.write(f"**Uploaded Context Audio:** {context_audio.name}")
1306
+ if response_audio:
1307
+ st.audio(response_audio, format='audio/wav')
1308
+ st.write(f"**Uploaded Response Audio:** {response_audio.name}")
1309
+
1310
+ elif input_type == "Image":
1311
+ st.write("Please upload image files for Prompt, Context, and Response.")
1312
+ prompt_image = st.file_uploader("Upload Prompt Image", type=["png", "jpg", "jpeg"])
1313
+ context_image = st.file_uploader("Upload Context Image", type=["png", "jpg", "jpeg"])
1314
+ response_image = st.file_uploader("Upload Response Image", type=["png", "jpg", "jpeg"])
1315
+
1316
+ if prompt_image:
1317
+ st.image(prompt_image, caption='Uploaded Prompt Image.', use_column_width=True)
1318
+ st.write(f"**Uploaded Prompt Image:** {prompt_image.name}")
1319
+ if context_image:
1320
+ st.image(context_image, caption='Uploaded Context Image.', use_column_width=True)
1321
+ st.write(f"**Uploaded Context Image:** {context_image.name}")
1322
+ if response_image:
1323
+ st.image(response_image, caption='Uploaded Response Image.', use_column_width=True)
1324
+ st.write(f"**Uploaded Response Image:** {response_image.name}")
1325
+
1326
+ # {{ edit_final: Handle Run Test for Image input with three images }}
1327
+ if st.button("Run Test"):
1328
+ if not model_name:
1329
+ st.error("Please select a valid Model.")
1330
+ elif input_type == "Text":
1331
+ if model_type.lower() == "simple" and test_data is None:
1332
+ st.error("Please upload a valid test data JSON file.")
1333
+ elif model_type.lower() != "simple" and (not context_dataset or not questions_json):
1334
+ st.error("Please provide both context dataset and questions JSON.")
1335
+ else:
1336
+ try:
1337
+ selected_model_data = next(
1338
+ (m for m in user_models if m['model_name'] == model_name),
1339
+ None
1340
+ )
1341
+ if selected_model_data:
1342
+ with st.spinner("Starting evaluations..."):
1343
+ if model_type.lower() == "simple":
1344
+ run_custom_evaluations(test_data, selected_model_data, st.session_state.user)
1345
+ st.success("Simple model evaluations are running in the background. You can navigate away or close the site.")
1346
+ elif model_type.lower() == "custom":
1347
+ questions = json.loads(questions_json)
1348
+ run_custom_evaluations((context_dataset, questions), selected_model_data, st.session_state.user)
1349
+ st.success("Custom model evaluations are running in the background. You can navigate away or close the site.")
1350
+ elif model_type.lower() == "huggingface":
1351
+ if not context_dataset or not questions_json:
1352
+ st.error("Please provide both context dataset and questions JSON.")
1353
+ else:
1354
+ try:
1355
+ questions = json.loads(questions_json)
1356
+ test_data = [
1357
+ {
1358
+ "prompt": extract_prompt_text(question),
1359
+ "context": context_dataset
1360
+ }
1361
+ for question in questions
1362
+ ]
1363
+ run_huggingface_evaluations(test_data, selected_model_data, st.session_state.user)
1364
+ st.success("Hugging Face model evaluations are running in the background. You can navigate away or close the site.")
1365
+ except Exception as e:
1366
+ st.error(f"An error occurred: {str(e)}")
1367
+ st.error(f"Detailed error: {traceback.format_exc()}")
1368
  else:
1369
+ st.error("Selected model not found.")
1370
+ except Exception as e:
1371
+ st.error(f"An error occurred: {str(e)}")
1372
+ st.error(f"Detailed error: {traceback.format_exc()}")
1373
+ st.success("Evaluations are running in the background. You can navigate away or close the site.")
1374
+ elif input_type == "Audio":
1375
+ if model_type.lower() == "simple" and test_data is None:
1376
+ st.error("Please upload a valid test data JSON file.")
1377
+ elif model_type.lower() != "simple" and (not context_dataset or not questions_json):
1378
+ st.error("Please provide both context dataset and questions JSON.")
1379
  else:
1380
+ try:
1381
+ selected_model = next(
1382
+ (m for m in user_models if m['model_name'] == model_name),
1383
+ None
1384
+ )
1385
+ if selected_model:
1386
+ with st.spinner("Processing audio files..."):
1387
+ prompt_text = transcribe_audio(prompt_audio)
1388
+ context_text = transcribe_audio(context_audio)
1389
+ response_text = transcribe_audio(response_audio)
1390
+
1391
+ test_data = [
1392
+ {
1393
+ "prompt": prompt_text,
1394
+ "context": context_text,
1395
+ "response": response_text
1396
+ }
1397
+ ]
1398
+
1399
+ with st.spinner("Starting evaluations..."):
1400
+ evaluation_thread = threading.Thread(
1401
+ target=run_custom_evaluations,
1402
+ args=(test_data, selected_model, st.session_state.user)
1403
+ )
1404
+ evaluation_thread.start()
1405
+ st.success("Evaluations are running in the background. You can navigate away or close the site.")
1406
+ else:
1407
+ st.error("Selected model not found.")
1408
+ except Exception as e:
1409
+ st.error(f"An error occurred: {e}")
1410
+ elif input_type == "Image":
1411
+ if not (prompt_image and context_image and response_image):
1412
+ st.error("Please upload all three image files: Prompt, Context, and Response.")
1413
+ else:
1414
+ try:
1415
+ selected_model = next(
1416
+ (m for m in user_models if m['model_name'] == model_name),
1417
+ None
1418
+ )
1419
+ if selected_model:
1420
+ with st.spinner("Processing images and starting evaluations..."):
1421
+ # Convert images to binary
1422
+ prompt_bytes = prompt_image.read()
1423
+ context_bytes = context_image.read()
1424
+ response_bytes = response_image.read()
1425
+
1426
+ # Use runner.py to summarize the images
1427
+ prompt_summary = summarize_image(prompt_bytes)
1428
+ context_summary = summarize_image(context_bytes)
1429
+ response_summary = summarize_image(response_bytes)
1430
+
1431
+ if prompt_summary and context_summary and response_summary:
1432
+ # Prepare test data with summaries
1433
+ test_data = [
1434
+ {
1435
+ "prompt": prompt_summary,
1436
+ "context": context_summary,
1437
+ "response": response_summary
1438
+ }
1439
+ ]
1440
+
1441
+ # Start the evaluation in a separate thread
1442
+ evaluation_thread = threading.Thread(
1443
+ target=run_custom_evaluations,
1444
+ args=(test_data, selected_model, st.session_state.user)
1445
+ )
1446
+ evaluation_thread.start()
1447
+ st.success("Images processed and evaluations are running in the background. You can navigate away or close the site.")
1448
+ else:
1449
+ st.error("Failed to generate summaries for the uploaded images.")
1450
+ else:
1451
+ st.error("Selected model not found.")
1452
+ except Exception as e:
1453
+ st.error(f"An error occurred: {e}")
1454
+ elif input_type == "Image":
1455
+ if not (prompt_image and context_image and response_image):
1456
+ st.error("Please upload all three image files: Prompt, Context, and Response.")
1457
+ else:
1458
+ try:
1459
+ selected_model = next(
1460
+ (m for m in user_models if m['model_name'] == model_name),
1461
+ None
1462
+ )
1463
+ if selected_model:
1464
+ with st.spinner("Processing images and starting evaluations..."):
1465
+ # Convert images to binary
1466
+ prompt_bytes = prompt_image.read()
1467
+ context_bytes = context_image.read()
1468
+ response_bytes = response_image.read()
1469
+
1470
+ # Use runner.py to summarize the images
1471
+ prompt_summary = summarize_image(prompt_bytes)
1472
+ context_summary = summarize_image(context_bytes)
1473
+ response_summary = summarize_image(response_bytes)
1474
+
1475
+ if prompt_summary and context_summary and response_summary:
1476
+ # Prepare test data with summaries
1477
+ test_data = [
1478
+ {
1479
+ "prompt": prompt_summary,
1480
+ "context": context_summary,
1481
+ "response": response_summary
1482
+ }
1483
+ ]
1484
+
1485
+ # Start the evaluation in a separate thread
1486
+ evaluation_thread = threading.Thread(
1487
+ target=run_custom_evaluations,
1488
+ args=(test_data, selected_model, st.session_state.user)
1489
+ )
1490
+ evaluation_thread.start()
1491
+ st.success("Images processed and evaluations are running in the background. You can navigate away or close the site.")
1492
+ else:
1493
+ st.error("Failed to generate summaries for the uploaded images.")
1494
+ else:
1495
+ st.error("Selected model not found.")
1496
+ except Exception as e:
1497
+ st.error(f"An error occurred: {e}")
1498
 
1499
+ elif st.session_state.app_mode == "Manage Models":
1500
  st.title("Manage Your Models")
1501
  # Fetch the user from the database
1502
  user = users_collection.find_one({"username": st.session_state.user})
 
1515
  )
1516
 
1517
  st.subheader("Add a New Model")
1518
+ model_type = st.radio("Select Model Type:", ["Simple Model", "Custom Model","huggingface"])
1519
 
1520
  if model_type == "Simple Model":
1521
  new_model_name = st.text_input("Enter New Model Name:")
1522
+ if st.button("Add Simple Model"):
1523
+ if new_model_name:
1524
  model_id = f"{st.session_state.user}_model_{int(datetime.now().timestamp())}"
1525
  model_data = {
1526
  "model_id": model_id,
1527
+ "model_name": new_model_name,
1528
+ "model_type": "simple",
1529
  "file_path": None,
1530
  "model_link": None,
1531
  "uploaded_at": datetime.now(),
 
1535
  {"username": st.session_state.user},
1536
  {"$push": {"models": model_data}}
1537
  )
1538
+ st.success(f"Model '{new_model_name}' added successfully as {model_id}!")
1539
  else:
1540
+ st.error("Please enter a valid model name.")
1541
 
1542
+ elif model_type == "Custom Model": # Custom Model
1543
  custom_model_options = ["gpt-4o", "gpt-4o-mini"]
1544
  selected_custom_model = st.selectbox("Select Custom Model:", custom_model_options)
1545
 
 
1557
  }}}
1558
  )
1559
  st.success(f"Custom Model '{selected_custom_model}' added successfully as {model_id}!")
1560
+ else:
1561
+ model_name = st.text_input("Enter Hugging Face Model Name:")
1562
+ api_endpoint = st.text_input("Enter Hugging Face API Endpoint:")
1563
+ api_token = st.text_input("Enter Hugging Face API Token:", type="password")
1564
+
1565
+ if st.button("Add Hugging Face Model"):
1566
+ if api_endpoint and api_token:
1567
+ model_id = f"{st.session_state.user}_model_{int(datetime.now().timestamp())}"
1568
+ model_data = {
1569
+ "model_id": model_id,
1570
+ "model_name": model_name,
1571
+ "model_type": "huggingface",
1572
+ "file_path": None,
1573
+ "model_link": api_endpoint,
1574
+ "model_api_token": api_token,
1575
+ "uploaded_at": datetime.now()
1576
+ }
1577
+ users_collection.update_one(
1578
+ {"username": st.session_state.user},
1579
+ {"$push": {"models": model_data}}
1580
+ )
1581
+ st.success(f"Hugging Face Model '{model_name}' added successfully as {model_id}!")
1582
 
1583
  st.markdown("---")
1584
 
 
1604
  {"$pull": {"models": {"model_id": model['model_id']}}}
1605
  )
1606
  st.success(f"Model {model['model_id']} deleted successfully!")
1607
+ st.rerun()
1608
  else:
1609
  st.info("You have no uploaded models.")
1610
 
1611
+ elif st.session_state.app_mode == "History":
1612
  st.title("History")
1613
  st.write("### Your Evaluation History")
1614
 
 
1688
  st.info("You have no evaluation history yet.")
1689
 
1690
  except Exception as e:
1691
+ st.error(f"Error fetching history data: {e}")