Ashmi Banerjee commited on
Commit
88f694a
·
1 Parent(s): 8c5eede

made it work for merged data

Browse files
app.py CHANGED
@@ -1,5 +1,3 @@
1
- import json
2
- from typing import Dict
3
  from utils.loaders import load_data
4
  from db.crud import read
5
  import streamlit as st
@@ -9,20 +7,11 @@ from views.intro_screen import welcome_screen
9
  from views.questions_screen import questions_screen, survey_completed
10
  from views.continue_survey import continue_survey_screen
11
  from css.layout import custom_css
 
 
12
 
13
  load_dotenv()
14
  VALIDATION_CODE = os.getenv("VALIDATION_CODE")
15
- if "VALIDATION_CODE" in os.environ:
16
- VALIDATION_CODE = os.getenv("VALIDATION_CODE")
17
- if "DATA_REPO" in os.environ:
18
- REPO_NAME = os.getenv("DATA_REPO")
19
- else:
20
- print("DATA_REPO not found in environment variables.")
21
- if "GEMINI_DATA_FILES" in os.environ:
22
- DATA_FILES = os.getenv("GEMINI_DATA_FILES")
23
- else:
24
- print("LLAMA_DATA_FILES not found in environment variables.")
25
-
26
 
27
 
28
  def initialization():
 
 
 
1
  from utils.loaders import load_data
2
  from db.crud import read
3
  import streamlit as st
 
7
  from views.questions_screen import questions_screen, survey_completed
8
  from views.continue_survey import continue_survey_screen
9
  from css.layout import custom_css
10
+ # st.set_page_config(layout="wide")
11
+
12
 
13
  load_dotenv()
14
  VALIDATION_CODE = os.getenv("VALIDATION_CODE")
 
 
 
 
 
 
 
 
 
 
 
15
 
16
 
17
  def initialization():
dummy_qa_data.csv DELETED
@@ -1,11 +0,0 @@
1
- q_id,Question,Generated Answer
2
- q_1,What is the capital of France?,The capital of France is Paris.
3
- q_2,Who wrote 'To Kill a Mockingbird'?,Harper Lee wrote 'To Kill a Mockingbird'.
4
- q_3,What is the largest planet in our solar system?,Jupiter is the largest planet in our solar system.
5
- q_4,Define photosynthesis.,Photosynthesis is the process by which green plants use sunlight to synthesize foods.
6
- q_5,Who painted the Mona Lisa?,Leonardo da Vinci painted the Mona Lisa.
7
- q_6,What is the speed of light?,"The speed of light is approximately 299,792 kilometers per second."
8
- q_7,Explain the theory of relativity.,"The theory of relativity, developed by Albert Einstein, explains the relationship between space and time."
9
- q_8,What is the chemical formula for water?,The chemical formula for water is H2O.
10
- q_9,Who discovered penicillin?,Alexander Fleming discovered penicillin.
11
- q_10,What is the square root of 64?,The square root of 64 is 8.
 
 
 
 
 
 
 
 
 
 
 
 
test.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+
4
+ # Sample Data (Replace with your actual data loading)
5
+ data = {
6
+ 'query_v': {
7
+ 'gemini': 'Cheap European city break in February.',
8
+ 'llama': 'Affordable European trip in February.',
9
+ },
10
+ 'query_p0': {
11
+ 'gemini': 'European city break in February, less crowded destinations.',
12
+ 'llama': 'February European city break, away from the crowds.',
13
+ },
14
+ 'query_p1': {
15
+ 'gemini': 'Best European cities for intense physical training and recovery with easy access to ice rinks?',
16
+ 'llama': 'Top European cities for intense training and recovery with ice rinks?',
17
+ },
18
+ }
19
+
20
+ # Sample rating data (Replace this with your actual data)
21
+ rating_data = {
22
+ 'gemini': {
23
+ 'query_v': {'relevance': 'Not Relevant', 'clarity': 'Not Clear'},
24
+ 'query_p0': {'relevance': 'Not Relevant', 'clarity': 'Not Clear', 'persona_alignment': 'N/A'},
25
+ 'query_p1': {'relevance': 'N/A', 'clarity': 'N/A', 'persona_alignment': 'N/A'},
26
+ },
27
+ 'llama': {
28
+ 'query_v': {'relevance': 'Somewhat Relevant', 'clarity': 'Somewhat Clear'},
29
+ 'query_p0': {'relevance': 'Somewhat Relevant', 'clarity': 'Somewhat Clear', 'persona_alignment': 'Partially Aligned'},
30
+ 'query_p1': {'relevance': 'Not Relevant', 'clarity': 'Not Clear', 'persona_alignment': 'Not Aligned'},
31
+ }
32
+ }
33
+
34
+
35
+ df = pd.DataFrame.from_dict(data)
36
+
37
+
38
+ # Function to display query, rating, and controls for one query
39
+ def display_query_section(query_type, query_text_gemini, query_text_llama, relevance_gemini, clarity_gemini, relevance_llama, clarity_llama, persona_alignment_gemini=None, persona_alignment_llama=None):
40
+ st.subheader(f"{query_type}")
41
+ col1, col2 = st.columns(2)
42
+ with col1:
43
+ st.markdown("Gemini")
44
+ st.write(query_text_gemini)
45
+ st.markdown("Relevance")
46
+ relevance_options = ['N/A', 'Not Relevant', 'Somewhat Relevant', 'Relevant', 'Unclear']
47
+ selected_relevance_gemini = st.radio("Relevance", options = relevance_options, key=f"relevance_{query_type}_gemini", index=relevance_options.index(relevance_gemini), horizontal=True)
48
+ st.markdown("Clarity")
49
+ clarity_options = ['N/A', 'Not Clear', 'Somewhat Clear', 'Very Clear']
50
+ selected_clarity_gemini = st.radio("Clarity", options = clarity_options, key=f"clarity_{query_type}_gemini", index=clarity_options.index(clarity_gemini), horizontal=True)
51
+
52
+ if persona_alignment_gemini:
53
+ st.markdown("Persona Alignment")
54
+ persona_options = ['N/A', 'Not Aligned', 'Partially Aligned', 'Aligned', 'Unclear']
55
+ selected_persona_alignment_gemini = st.radio("Persona Alignment", options = persona_options, key=f"persona_{query_type}_gemini", index=persona_options.index(persona_alignment_gemini), horizontal=True)
56
+ with col2:
57
+ st.markdown("Llama")
58
+ st.write(query_text_llama)
59
+ st.markdown("Relevance")
60
+ relevance_options_llama = ['N/A', 'Not Relevant', 'Somewhat Relevant', 'Relevant', 'Unclear']
61
+ selected_relevance_llama = st.radio("Relevance", options = relevance_options_llama, key=f"relevance_{query_type}_llama", index=relevance_options_llama.index(relevance_llama), horizontal=True)
62
+ st.markdown("Clarity")
63
+ clarity_options_llama = ['N/A', 'Not Clear', 'Somewhat Clear', 'Very Clear']
64
+ selected_clarity_llama = st.radio("Clarity", options = clarity_options_llama, key=f"clarity_{query_type}_llama", index=clarity_options_llama.index(clarity_llama), horizontal=True)
65
+ if persona_alignment_llama:
66
+ st.markdown("Persona Alignment")
67
+ persona_options_llama = ['N/A', 'Not Aligned', 'Partially Aligned', 'Aligned', 'Unclear']
68
+ selected_persona_alignment_llama = st.radio("Persona Alignment", options = persona_options_llama, key=f"persona_{query_type}_llama", index=persona_options_llama.index(persona_alignment_llama), horizontal=True)
69
+
70
+ # Main Streamlit App
71
+ st.set_page_config(layout="wide")
72
+
73
+ # Context Information
74
+ st.title("Question 1 of 5")
75
+ st.subheader("Config ID: c_p_0_pop_low_easy")
76
+ st.markdown("### Context Information")
77
+ with st.expander("Persona", expanded=True):
78
+ st.write("A top-scoring player in the local league who is also eyeing a professional career in the NHL")
79
+ with st.expander("Filters & Cities", expanded=True):
80
+ st.write("Filters: {'popularity': 'low', 'month': 'February'}")
81
+ st.write("Cities: ['Adana', 'Adiyaman', 'Agri', 'Arad', 'Arkhangelsk', 'Bacau', 'Baia Mare', 'Balikesir', 'Brest',\
82
+ 'Burgas', 'Canakkale', 'Craiova', 'Debrecen', 'Denizli', 'Diyarbakir', 'Elazig', 'Erzincan', 'Eskisehir',\
83
+ 'Gaziantep', 'lasi', 'Ioannina', 'Isparta', 'Jonkoping', 'Kahramanmaras', 'Kars', 'Kayseri', 'Konya', 'Kosice',\
84
+ 'Linkoping', 'Malatya', 'Miskolc', 'Mykolaiv', 'Nalchik', 'Nevsehir', 'Nis', 'Orebro', 'Orleans', 'Rivne',\
85
+ 'Rzeszow', 'Samsun', 'Sanliurfa', 'Sevilla', 'Siirt', 'Sivas', 'Syktyvkar', 'Targu-Mures', 'Tekirdag',\
86
+ 'Thessaloniki', 'Trabzon', 'Uzhhorod', 'Valladolid', 'Van', 'Vasteras', 'Vinnytsia', 'Vitoria-Gasteiz',\
87
+ 'Vladikavkaz', 'Zaporizhzhia', 'Zielona Gora', 'Batman', 'Erzurum']")
88
+
89
+
90
+ # Display Query Sections
91
+ display_query_section(
92
+ query_type="Query_v",
93
+ query_text_gemini=df.loc['gemini','query_v'],
94
+ query_text_llama=df.loc['llama','query_v'],
95
+ relevance_gemini=rating_data['gemini']['query_v']['relevance'],
96
+ clarity_gemini=rating_data['gemini']['query_v']['clarity'],
97
+ relevance_llama=rating_data['llama']['query_v']['relevance'],
98
+ clarity_llama=rating_data['llama']['query_v']['clarity'],
99
+ )
100
+
101
+ display_query_section(
102
+ query_type="Query_p0",
103
+ query_text_gemini=df.loc['gemini','query_p0'],
104
+ query_text_llama=df.loc['llama','query_p0'],
105
+ relevance_gemini=rating_data['gemini']['query_p0']['relevance'],
106
+ clarity_gemini=rating_data['gemini']['query_p0']['clarity'],
107
+ persona_alignment_gemini=rating_data['gemini']['query_p0']['persona_alignment'],
108
+ relevance_llama=rating_data['llama']['query_p0']['relevance'],
109
+ clarity_llama=rating_data['llama']['query_p0']['clarity'],
110
+ persona_alignment_llama=rating_data['llama']['query_p0']['persona_alignment'],
111
+ )
112
+
113
+ display_query_section(
114
+ query_type="Query_p1",
115
+ query_text_gemini=df.loc['gemini','query_p1'],
116
+ query_text_llama=df.loc['llama','query_p1'],
117
+ relevance_gemini=rating_data['gemini']['query_p1']['relevance'],
118
+ clarity_gemini=rating_data['gemini']['query_p1']['clarity'],
119
+ persona_alignment_gemini=rating_data['gemini']['query_p1']['persona_alignment'],
120
+ relevance_llama=rating_data['llama']['query_p1']['relevance'],
121
+ clarity_llama=rating_data['llama']['query_p1']['clarity'],
122
+ persona_alignment_llama=rating_data['llama']['query_p1']['persona_alignment'],
123
+
124
+ )
125
+ # Additional Comments
126
+ st.markdown("Additional Comments (Optional):")
127
+ st.text_area("", key="additional_comments")
128
+
129
+ # Navigation Buttons
130
+ col1, col2, col3 = st.columns([1,1,1])
131
+ with col1:
132
+ st.button("Back")
133
+ with col2:
134
+ st.button("Next")
135
+ with col3:
136
+ st.button("Exit & Resume Later")
137
+ # Bottom message
138
+ st.markdown("Please provide a rating before proceeding.")
utils/loaders.py CHANGED
@@ -12,8 +12,7 @@ DATA_FILES = os.getenv("GEMINI_DATA_FILES")
12
 
13
  def load_data():
14
  try:
15
- #TODO: change this to load the data from the database (buggy for debugging)
16
- data = pd.read_csv("data/gemini_results_subset.csv")[:5]
17
  return data
18
  except Exception as e:
19
 
 
12
 
13
  def load_data():
14
  try:
15
+ data = pd.read_csv("data/user-evaluation/merged.csv")[:5]
 
16
  return data
17
  except Exception as e:
18
 
utils/notebooks/Data Merging.ipynb ADDED
@@ -0,0 +1,858 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "id": "a16bb8a1",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import pandas as pd"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 6,
16
+ "id": "4fc12e60",
17
+ "metadata": {},
18
+ "outputs": [
19
+ {
20
+ "name": "stdout",
21
+ "output_type": "stream",
22
+ "text": [
23
+ "(200, 9)\n"
24
+ ]
25
+ },
26
+ {
27
+ "data": {
28
+ "text/html": [
29
+ "<div>\n",
30
+ "<style scoped>\n",
31
+ " .dataframe tbody tr th:only-of-type {\n",
32
+ " vertical-align: middle;\n",
33
+ " }\n",
34
+ "\n",
35
+ " .dataframe tbody tr th {\n",
36
+ " vertical-align: top;\n",
37
+ " }\n",
38
+ "\n",
39
+ " .dataframe thead th {\n",
40
+ " text-align: right;\n",
41
+ " }\n",
42
+ "</style>\n",
43
+ "<table border=\"1\" class=\"dataframe\">\n",
44
+ " <thead>\n",
45
+ " <tr style=\"text-align: right;\">\n",
46
+ " <th></th>\n",
47
+ " <th>config_id</th>\n",
48
+ " <th>persona_id</th>\n",
49
+ " <th>persona</th>\n",
50
+ " <th>filters</th>\n",
51
+ " <th>context</th>\n",
52
+ " <th>city</th>\n",
53
+ " <th>llama_query_v</th>\n",
54
+ " <th>llama_query_p0</th>\n",
55
+ " <th>llama_query_p1</th>\n",
56
+ " </tr>\n",
57
+ " </thead>\n",
58
+ " <tbody>\n",
59
+ " <tr>\n",
60
+ " <th>0</th>\n",
61
+ " <td>c_p_0_pop_low_easy</td>\n",
62
+ " <td>p_0</td>\n",
63
+ " <td>A top-scoring player in the local league who i...</td>\n",
64
+ " <td>{'popularity': 'low', 'month': 'February'}</td>\n",
65
+ " <td>Adana has low popularity. Adana has low season...</td>\n",
66
+ " <td>['Adana', 'Adiyaman', 'Agri', 'Arad', 'Arkhang...</td>\n",
67
+ " <td>\"Less crowded European cities to visit in Febr...</td>\n",
68
+ " <td>\"European cities with ice hockey facilities, l...</td>\n",
69
+ " <td>Based on the user's profile as a top-scoring p...</td>\n",
70
+ " </tr>\n",
71
+ " <tr>\n",
72
+ " <th>1</th>\n",
73
+ " <td>c_p_1_pop_medium_medium</td>\n",
74
+ " <td>p_1</td>\n",
75
+ " <td>A former DJ at WSUM who is now working as a mu...</td>\n",
76
+ " <td>{'popularity': 'medium', 'budget': 'medium', '...</td>\n",
77
+ " <td>Coimbra has medium popularity and medium budge...</td>\n",
78
+ " <td>['Coimbra', 'Brno', 'Braga']</td>\n",
79
+ " <td>'medium budget European city breaks with parks...</td>\n",
80
+ " <td>\"Medium budget European cities with parks and ...</td>\n",
81
+ " <td>Based on the user's background as a former DJ ...</td>\n",
82
+ " </tr>\n",
83
+ " <tr>\n",
84
+ " <th>2</th>\n",
85
+ " <td>c_p_2_pop_high_hard</td>\n",
86
+ " <td>p_2</td>\n",
87
+ " <td>A fellow agent-turned-author who shares the sa...</td>\n",
88
+ " <td>{'popularity': 'high', 'budget': 'low', 'inter...</td>\n",
89
+ " <td>Zagreb has high popularity and low budget. Zag...</td>\n",
90
+ " <td>['Zagreb', 'Volgograd', 'Tirana', 'Tbilisi', '...</td>\n",
91
+ " <td>\"Looking for a popular and affordable European...</td>\n",
92
+ " <td>\"Low-budget European cities with museums and n...</td>\n",
93
+ " <td>Based on the provided information, I'm going t...</td>\n",
94
+ " </tr>\n",
95
+ " <tr>\n",
96
+ " <th>3</th>\n",
97
+ " <td>c_p_3_pop_low_sustainable</td>\n",
98
+ " <td>p_3</td>\n",
99
+ " <td>a film critic who dislikes storylines involvin...</td>\n",
100
+ " <td>{'popularity': 'low', 'interests': 'Outdoors &amp;...</td>\n",
101
+ " <td>Van has low popularity. Van has low season in ...</td>\n",
102
+ " <td>['Van', 'Uzhhorod', 'Trabzon', 'Thessaloniki',...</td>\n",
103
+ " <td>\"European cities with low popularity, monaster...</td>\n",
104
+ " <td>\"off the beaten path European city breaks in l...</td>\n",
105
+ " <td>Based on the given information, I'll create a ...</td>\n",
106
+ " </tr>\n",
107
+ " <tr>\n",
108
+ " <th>4</th>\n",
109
+ " <td>c_p_4_pop_medium_easy</td>\n",
110
+ " <td>p_4</td>\n",
111
+ " <td>A biology major conducting research on equine ...</td>\n",
112
+ " <td>{'popularity': 'medium', 'budget': 'high'}</td>\n",
113
+ " <td>Aalborg has medium popularity and high budget....</td>\n",
114
+ " <td>['Aalborg', 'Astrakhan', 'Bari', 'Bremen', 'Ch...</td>\n",
115
+ " <td>\"European cities for a luxurious trip.\"</td>\n",
116
+ " <td>\"European cities with horse riding trails and ...</td>\n",
117
+ " <td>Based on the user's background and interests, ...</td>\n",
118
+ " </tr>\n",
119
+ " </tbody>\n",
120
+ "</table>\n",
121
+ "</div>"
122
+ ],
123
+ "text/plain": [
124
+ " config_id persona_id \\\n",
125
+ "0 c_p_0_pop_low_easy p_0 \n",
126
+ "1 c_p_1_pop_medium_medium p_1 \n",
127
+ "2 c_p_2_pop_high_hard p_2 \n",
128
+ "3 c_p_3_pop_low_sustainable p_3 \n",
129
+ "4 c_p_4_pop_medium_easy p_4 \n",
130
+ "\n",
131
+ " persona \\\n",
132
+ "0 A top-scoring player in the local league who i... \n",
133
+ "1 A former DJ at WSUM who is now working as a mu... \n",
134
+ "2 A fellow agent-turned-author who shares the sa... \n",
135
+ "3 a film critic who dislikes storylines involvin... \n",
136
+ "4 A biology major conducting research on equine ... \n",
137
+ "\n",
138
+ " filters \\\n",
139
+ "0 {'popularity': 'low', 'month': 'February'} \n",
140
+ "1 {'popularity': 'medium', 'budget': 'medium', '... \n",
141
+ "2 {'popularity': 'high', 'budget': 'low', 'inter... \n",
142
+ "3 {'popularity': 'low', 'interests': 'Outdoors &... \n",
143
+ "4 {'popularity': 'medium', 'budget': 'high'} \n",
144
+ "\n",
145
+ " context \\\n",
146
+ "0 Adana has low popularity. Adana has low season... \n",
147
+ "1 Coimbra has medium popularity and medium budge... \n",
148
+ "2 Zagreb has high popularity and low budget. Zag... \n",
149
+ "3 Van has low popularity. Van has low season in ... \n",
150
+ "4 Aalborg has medium popularity and high budget.... \n",
151
+ "\n",
152
+ " city \\\n",
153
+ "0 ['Adana', 'Adiyaman', 'Agri', 'Arad', 'Arkhang... \n",
154
+ "1 ['Coimbra', 'Brno', 'Braga'] \n",
155
+ "2 ['Zagreb', 'Volgograd', 'Tirana', 'Tbilisi', '... \n",
156
+ "3 ['Van', 'Uzhhorod', 'Trabzon', 'Thessaloniki',... \n",
157
+ "4 ['Aalborg', 'Astrakhan', 'Bari', 'Bremen', 'Ch... \n",
158
+ "\n",
159
+ " llama_query_v \\\n",
160
+ "0 \"Less crowded European cities to visit in Febr... \n",
161
+ "1 'medium budget European city breaks with parks... \n",
162
+ "2 \"Looking for a popular and affordable European... \n",
163
+ "3 \"European cities with low popularity, monaster... \n",
164
+ "4 \"European cities for a luxurious trip.\" \n",
165
+ "\n",
166
+ " llama_query_p0 \\\n",
167
+ "0 \"European cities with ice hockey facilities, l... \n",
168
+ "1 \"Medium budget European cities with parks and ... \n",
169
+ "2 \"Low-budget European cities with museums and n... \n",
170
+ "3 \"off the beaten path European city breaks in l... \n",
171
+ "4 \"European cities with horse riding trails and ... \n",
172
+ "\n",
173
+ " llama_query_p1 \n",
174
+ "0 Based on the user's profile as a top-scoring p... \n",
175
+ "1 Based on the user's background as a former DJ ... \n",
176
+ "2 Based on the provided information, I'm going t... \n",
177
+ "3 Based on the given information, I'll create a ... \n",
178
+ "4 Based on the user's background and interests, ... "
179
+ ]
180
+ },
181
+ "execution_count": 6,
182
+ "metadata": {},
183
+ "output_type": "execute_result"
184
+ }
185
+ ],
186
+ "source": [
187
+ "llama = pd.read_csv(\"../../data/llama_results_subset.csv\")\n",
188
+ "# llama[\"model\"] = \"llama-3.2-90b\"\n",
189
+ "\n",
190
+ "llama.rename(columns={'query_v': \"llama_query_v\", 'query_p0': \"llama_query_p0\", \"query_p1\": \"llama_query_p1\"}, inplace = True)\n",
191
+ "\n",
192
+ "print(llama.shape)\n",
193
+ "llama.head()"
194
+ ]
195
+ },
196
+ {
197
+ "cell_type": "code",
198
+ "execution_count": 7,
199
+ "id": "a651f27b",
200
+ "metadata": {},
201
+ "outputs": [
202
+ {
203
+ "name": "stdout",
204
+ "output_type": "stream",
205
+ "text": [
206
+ "(200, 9)\n"
207
+ ]
208
+ },
209
+ {
210
+ "data": {
211
+ "text/html": [
212
+ "<div>\n",
213
+ "<style scoped>\n",
214
+ " .dataframe tbody tr th:only-of-type {\n",
215
+ " vertical-align: middle;\n",
216
+ " }\n",
217
+ "\n",
218
+ " .dataframe tbody tr th {\n",
219
+ " vertical-align: top;\n",
220
+ " }\n",
221
+ "\n",
222
+ " .dataframe thead th {\n",
223
+ " text-align: right;\n",
224
+ " }\n",
225
+ "</style>\n",
226
+ "<table border=\"1\" class=\"dataframe\">\n",
227
+ " <thead>\n",
228
+ " <tr style=\"text-align: right;\">\n",
229
+ " <th></th>\n",
230
+ " <th>config_id</th>\n",
231
+ " <th>persona_id</th>\n",
232
+ " <th>persona</th>\n",
233
+ " <th>filters</th>\n",
234
+ " <th>context</th>\n",
235
+ " <th>city</th>\n",
236
+ " <th>gemini_query_v</th>\n",
237
+ " <th>gemini_query_p0</th>\n",
238
+ " <th>gemini_query_p1</th>\n",
239
+ " </tr>\n",
240
+ " </thead>\n",
241
+ " <tbody>\n",
242
+ " <tr>\n",
243
+ " <th>0</th>\n",
244
+ " <td>c_p_0_pop_low_easy</td>\n",
245
+ " <td>p_0</td>\n",
246
+ " <td>A top-scoring player in the local league who i...</td>\n",
247
+ " <td>{'popularity': 'low', 'month': 'February'}</td>\n",
248
+ " <td>Adana has low popularity. Adana has low season...</td>\n",
249
+ " <td>['Adana', 'Adiyaman', 'Agri', 'Arad', 'Arkhang...</td>\n",
250
+ " <td>Cheap European city break in February.\\n</td>\n",
251
+ " <td>European city break in February, less crowded ...</td>\n",
252
+ " <td>Best European cities for intense physical trai...</td>\n",
253
+ " </tr>\n",
254
+ " <tr>\n",
255
+ " <th>1</th>\n",
256
+ " <td>c_p_1_pop_medium_medium</td>\n",
257
+ " <td>p_1</td>\n",
258
+ " <td>A former DJ at WSUM who is now working as a mu...</td>\n",
259
+ " <td>{'popularity': 'medium', 'budget': 'medium', '...</td>\n",
260
+ " <td>Coimbra has medium popularity and medium budge...</td>\n",
261
+ " <td>['Coimbra', 'Brno', 'Braga']</td>\n",
262
+ " <td>Medium budget European city break with parks a...</td>\n",
263
+ " <td>European city break, medium budget, good parks...</td>\n",
264
+ " <td>Best European cities for live music, especiall...</td>\n",
265
+ " </tr>\n",
266
+ " <tr>\n",
267
+ " <th>2</th>\n",
268
+ " <td>c_p_2_pop_high_hard</td>\n",
269
+ " <td>p_2</td>\n",
270
+ " <td>A fellow agent-turned-author who shares the sa...</td>\n",
271
+ " <td>{'popularity': 'high', 'budget': 'low', 'inter...</td>\n",
272
+ " <td>Zagreb has high popularity and low budget. Zag...</td>\n",
273
+ " <td>['Zagreb', 'Volgograd', 'Tirana', 'Tbilisi', '...</td>\n",
274
+ " <td>Cheap January city break in Europe with museum...</td>\n",
275
+ " <td>European city break in January. Budget friendl...</td>\n",
276
+ " <td>Where can I find inspiring European cities wit...</td>\n",
277
+ " </tr>\n",
278
+ " <tr>\n",
279
+ " <th>3</th>\n",
280
+ " <td>c_p_3_pop_low_sustainable</td>\n",
281
+ " <td>p_3</td>\n",
282
+ " <td>a film critic who dislikes storylines involvin...</td>\n",
283
+ " <td>{'popularity': 'low', 'interests': 'Outdoors &amp;...</td>\n",
284
+ " <td>Van has low popularity. Van has low season in ...</td>\n",
285
+ " <td>['Van', 'Uzhhorod', 'Trabzon', 'Thessaloniki',...</td>\n",
286
+ " <td>Cheap European city break in January with inte...</td>\n",
287
+ " <td>European city break in January. Low-budget des...</td>\n",
288
+ " <td>Best European cities for unique, artistic expe...</td>\n",
289
+ " </tr>\n",
290
+ " <tr>\n",
291
+ " <th>4</th>\n",
292
+ " <td>c_p_4_pop_medium_easy</td>\n",
293
+ " <td>p_4</td>\n",
294
+ " <td>A biology major conducting research on equine ...</td>\n",
295
+ " <td>{'popularity': 'medium', 'budget': 'high'}</td>\n",
296
+ " <td>Aalborg has medium popularity and high budget....</td>\n",
297
+ " <td>['Aalborg', 'Astrakhan', 'Bari', 'Bremen', 'Ch...</td>\n",
298
+ " <td>Suggest some moderately popular European citie...</td>\n",
299
+ " <td>High-budget European city with equestrian attr...</td>\n",
300
+ " <td>European cities with renowned veterinary or ag...</td>\n",
301
+ " </tr>\n",
302
+ " </tbody>\n",
303
+ "</table>\n",
304
+ "</div>"
305
+ ],
306
+ "text/plain": [
307
+ " config_id persona_id \\\n",
308
+ "0 c_p_0_pop_low_easy p_0 \n",
309
+ "1 c_p_1_pop_medium_medium p_1 \n",
310
+ "2 c_p_2_pop_high_hard p_2 \n",
311
+ "3 c_p_3_pop_low_sustainable p_3 \n",
312
+ "4 c_p_4_pop_medium_easy p_4 \n",
313
+ "\n",
314
+ " persona \\\n",
315
+ "0 A top-scoring player in the local league who i... \n",
316
+ "1 A former DJ at WSUM who is now working as a mu... \n",
317
+ "2 A fellow agent-turned-author who shares the sa... \n",
318
+ "3 a film critic who dislikes storylines involvin... \n",
319
+ "4 A biology major conducting research on equine ... \n",
320
+ "\n",
321
+ " filters \\\n",
322
+ "0 {'popularity': 'low', 'month': 'February'} \n",
323
+ "1 {'popularity': 'medium', 'budget': 'medium', '... \n",
324
+ "2 {'popularity': 'high', 'budget': 'low', 'inter... \n",
325
+ "3 {'popularity': 'low', 'interests': 'Outdoors &... \n",
326
+ "4 {'popularity': 'medium', 'budget': 'high'} \n",
327
+ "\n",
328
+ " context \\\n",
329
+ "0 Adana has low popularity. Adana has low season... \n",
330
+ "1 Coimbra has medium popularity and medium budge... \n",
331
+ "2 Zagreb has high popularity and low budget. Zag... \n",
332
+ "3 Van has low popularity. Van has low season in ... \n",
333
+ "4 Aalborg has medium popularity and high budget.... \n",
334
+ "\n",
335
+ " city \\\n",
336
+ "0 ['Adana', 'Adiyaman', 'Agri', 'Arad', 'Arkhang... \n",
337
+ "1 ['Coimbra', 'Brno', 'Braga'] \n",
338
+ "2 ['Zagreb', 'Volgograd', 'Tirana', 'Tbilisi', '... \n",
339
+ "3 ['Van', 'Uzhhorod', 'Trabzon', 'Thessaloniki',... \n",
340
+ "4 ['Aalborg', 'Astrakhan', 'Bari', 'Bremen', 'Ch... \n",
341
+ "\n",
342
+ " gemini_query_v \\\n",
343
+ "0 Cheap European city break in February.\\n \n",
344
+ "1 Medium budget European city break with parks a... \n",
345
+ "2 Cheap January city break in Europe with museum... \n",
346
+ "3 Cheap European city break in January with inte... \n",
347
+ "4 Suggest some moderately popular European citie... \n",
348
+ "\n",
349
+ " gemini_query_p0 \\\n",
350
+ "0 European city break in February, less crowded ... \n",
351
+ "1 European city break, medium budget, good parks... \n",
352
+ "2 European city break in January. Budget friendl... \n",
353
+ "3 European city break in January. Low-budget des... \n",
354
+ "4 High-budget European city with equestrian attr... \n",
355
+ "\n",
356
+ " gemini_query_p1 \n",
357
+ "0 Best European cities for intense physical trai... \n",
358
+ "1 Best European cities for live music, especiall... \n",
359
+ "2 Where can I find inspiring European cities wit... \n",
360
+ "3 Best European cities for unique, artistic expe... \n",
361
+ "4 European cities with renowned veterinary or ag... "
362
+ ]
363
+ },
364
+ "execution_count": 7,
365
+ "metadata": {},
366
+ "output_type": "execute_result"
367
+ }
368
+ ],
369
+ "source": [
370
+ "gemini = pd.read_csv(\"../../data/gemini_results_subset.csv\")\n",
371
+ "# gemini[\"model\"] = \"gemini-1.5-pro-002\"\n",
372
+ "gemini.rename(columns={'query_v': \"gemini_query_v\", 'query_p0': \"gemini_query_p0\", \"query_p1\": \"gemini_query_p1\"}, inplace = True)\n",
373
+ "\n",
374
+ "print(gemini.shape)\n",
375
+ "gemini.head()"
376
+ ]
377
+ },
378
+ {
379
+ "cell_type": "code",
380
+ "execution_count": 8,
381
+ "id": "dfd8102c",
382
+ "metadata": {},
383
+ "outputs": [
384
+ {
385
+ "name": "stdout",
386
+ "output_type": "stream",
387
+ "text": [
388
+ "6\n"
389
+ ]
390
+ },
391
+ {
392
+ "data": {
393
+ "text/plain": [
394
+ "['filters', 'city', 'context', 'config_id', 'persona', 'persona_id']"
395
+ ]
396
+ },
397
+ "execution_count": 8,
398
+ "metadata": {},
399
+ "output_type": "execute_result"
400
+ }
401
+ ],
402
+ "source": [
403
+ "common_cols = list(set(list(gemini)).intersection(set(list(llama))))\n",
404
+ "print(len(common_cols))\n",
405
+ "common_cols"
406
+ ]
407
+ },
408
+ {
409
+ "cell_type": "code",
410
+ "execution_count": 9,
411
+ "id": "32344df6",
412
+ "metadata": {},
413
+ "outputs": [
414
+ {
415
+ "name": "stdout",
416
+ "output_type": "stream",
417
+ "text": [
418
+ "(200, 12)\n"
419
+ ]
420
+ },
421
+ {
422
+ "data": {
423
+ "text/html": [
424
+ "<div>\n",
425
+ "<style scoped>\n",
426
+ " .dataframe tbody tr th:only-of-type {\n",
427
+ " vertical-align: middle;\n",
428
+ " }\n",
429
+ "\n",
430
+ " .dataframe tbody tr th {\n",
431
+ " vertical-align: top;\n",
432
+ " }\n",
433
+ "\n",
434
+ " .dataframe thead th {\n",
435
+ " text-align: right;\n",
436
+ " }\n",
437
+ "</style>\n",
438
+ "<table border=\"1\" class=\"dataframe\">\n",
439
+ " <thead>\n",
440
+ " <tr style=\"text-align: right;\">\n",
441
+ " <th></th>\n",
442
+ " <th>config_id</th>\n",
443
+ " <th>persona_id</th>\n",
444
+ " <th>persona</th>\n",
445
+ " <th>filters</th>\n",
446
+ " <th>context</th>\n",
447
+ " <th>city</th>\n",
448
+ " <th>gemini_query_v</th>\n",
449
+ " <th>gemini_query_p0</th>\n",
450
+ " <th>gemini_query_p1</th>\n",
451
+ " <th>llama_query_v</th>\n",
452
+ " <th>llama_query_p0</th>\n",
453
+ " <th>llama_query_p1</th>\n",
454
+ " </tr>\n",
455
+ " </thead>\n",
456
+ " <tbody>\n",
457
+ " <tr>\n",
458
+ " <th>0</th>\n",
459
+ " <td>c_p_0_pop_low_easy</td>\n",
460
+ " <td>p_0</td>\n",
461
+ " <td>A top-scoring player in the local league who i...</td>\n",
462
+ " <td>{'popularity': 'low', 'month': 'February'}</td>\n",
463
+ " <td>Adana has low popularity. Adana has low season...</td>\n",
464
+ " <td>['Adana', 'Adiyaman', 'Agri', 'Arad', 'Arkhang...</td>\n",
465
+ " <td>Cheap European city break in February.\\n</td>\n",
466
+ " <td>European city break in February, less crowded ...</td>\n",
467
+ " <td>Best European cities for intense physical trai...</td>\n",
468
+ " <td>\"Less crowded European cities to visit in Febr...</td>\n",
469
+ " <td>\"European cities with ice hockey facilities, l...</td>\n",
470
+ " <td>Based on the user's profile as a top-scoring p...</td>\n",
471
+ " </tr>\n",
472
+ " <tr>\n",
473
+ " <th>1</th>\n",
474
+ " <td>c_p_1_pop_medium_medium</td>\n",
475
+ " <td>p_1</td>\n",
476
+ " <td>A former DJ at WSUM who is now working as a mu...</td>\n",
477
+ " <td>{'popularity': 'medium', 'budget': 'medium', '...</td>\n",
478
+ " <td>Coimbra has medium popularity and medium budge...</td>\n",
479
+ " <td>['Coimbra', 'Brno', 'Braga']</td>\n",
480
+ " <td>Medium budget European city break with parks a...</td>\n",
481
+ " <td>European city break, medium budget, good parks...</td>\n",
482
+ " <td>Best European cities for live music, especiall...</td>\n",
483
+ " <td>'medium budget European city breaks with parks...</td>\n",
484
+ " <td>\"Medium budget European cities with parks and ...</td>\n",
485
+ " <td>Based on the user's background as a former DJ ...</td>\n",
486
+ " </tr>\n",
487
+ " <tr>\n",
488
+ " <th>2</th>\n",
489
+ " <td>c_p_2_pop_high_hard</td>\n",
490
+ " <td>p_2</td>\n",
491
+ " <td>A fellow agent-turned-author who shares the sa...</td>\n",
492
+ " <td>{'popularity': 'high', 'budget': 'low', 'inter...</td>\n",
493
+ " <td>Zagreb has high popularity and low budget. Zag...</td>\n",
494
+ " <td>['Zagreb', 'Volgograd', 'Tirana', 'Tbilisi', '...</td>\n",
495
+ " <td>Cheap January city break in Europe with museum...</td>\n",
496
+ " <td>European city break in January. Budget friendl...</td>\n",
497
+ " <td>Where can I find inspiring European cities wit...</td>\n",
498
+ " <td>\"Looking for a popular and affordable European...</td>\n",
499
+ " <td>\"Low-budget European cities with museums and n...</td>\n",
500
+ " <td>Based on the provided information, I'm going t...</td>\n",
501
+ " </tr>\n",
502
+ " <tr>\n",
503
+ " <th>3</th>\n",
504
+ " <td>c_p_3_pop_low_sustainable</td>\n",
505
+ " <td>p_3</td>\n",
506
+ " <td>a film critic who dislikes storylines involvin...</td>\n",
507
+ " <td>{'popularity': 'low', 'interests': 'Outdoors &amp;...</td>\n",
508
+ " <td>Van has low popularity. Van has low season in ...</td>\n",
509
+ " <td>['Van', 'Uzhhorod', 'Trabzon', 'Thessaloniki',...</td>\n",
510
+ " <td>Cheap European city break in January with inte...</td>\n",
511
+ " <td>European city break in January. Low-budget des...</td>\n",
512
+ " <td>Best European cities for unique, artistic expe...</td>\n",
513
+ " <td>\"European cities with low popularity, monaster...</td>\n",
514
+ " <td>\"off the beaten path European city breaks in l...</td>\n",
515
+ " <td>Based on the given information, I'll create a ...</td>\n",
516
+ " </tr>\n",
517
+ " <tr>\n",
518
+ " <th>4</th>\n",
519
+ " <td>c_p_4_pop_medium_easy</td>\n",
520
+ " <td>p_4</td>\n",
521
+ " <td>A biology major conducting research on equine ...</td>\n",
522
+ " <td>{'popularity': 'medium', 'budget': 'high'}</td>\n",
523
+ " <td>Aalborg has medium popularity and high budget....</td>\n",
524
+ " <td>['Aalborg', 'Astrakhan', 'Bari', 'Bremen', 'Ch...</td>\n",
525
+ " <td>Suggest some moderately popular European citie...</td>\n",
526
+ " <td>High-budget European city with equestrian attr...</td>\n",
527
+ " <td>European cities with renowned veterinary or ag...</td>\n",
528
+ " <td>\"European cities for a luxurious trip.\"</td>\n",
529
+ " <td>\"European cities with horse riding trails and ...</td>\n",
530
+ " <td>Based on the user's background and interests, ...</td>\n",
531
+ " </tr>\n",
532
+ " <tr>\n",
533
+ " <th>...</th>\n",
534
+ " <td>...</td>\n",
535
+ " <td>...</td>\n",
536
+ " <td>...</td>\n",
537
+ " <td>...</td>\n",
538
+ " <td>...</td>\n",
539
+ " <td>...</td>\n",
540
+ " <td>...</td>\n",
541
+ " <td>...</td>\n",
542
+ " <td>...</td>\n",
543
+ " <td>...</td>\n",
544
+ " <td>...</td>\n",
545
+ " <td>...</td>\n",
546
+ " </tr>\n",
547
+ " <tr>\n",
548
+ " <th>195</th>\n",
549
+ " <td>c_p_8_pop_medium_easy</td>\n",
550
+ " <td>p_8</td>\n",
551
+ " <td>A junior Flash developer seeking guidance on c...</td>\n",
552
+ " <td>{'popularity': 'medium', 'interests': 'Outdoor...</td>\n",
553
+ " <td>Zaragoza has medium popularity. In Zaragoza yo...</td>\n",
554
+ " <td>['Zaragoza', 'Varna', 'Turku', 'Tampere', 'Szc...</td>\n",
555
+ " <td>Good places to swim outdoors in Europe\\n</td>\n",
556
+ " <td>Good places for a Flash developer to visit in ...</td>\n",
557
+ " <td>Best European cities for tech meetups &amp; cowork...</td>\n",
558
+ " <td>\"European cities with medium popularity for ou...</td>\n",
559
+ " <td>A junior Flash developer seeking guidance on c...</td>\n",
560
+ " <td>Which European cities offer a mix of cultural ...</td>\n",
561
+ " </tr>\n",
562
+ " <tr>\n",
563
+ " <th>196</th>\n",
564
+ " <td>c_p_9_pop_high_medium</td>\n",
565
+ " <td>p_9</td>\n",
566
+ " <td>An avid gamer who has played Sins of a Solar E...</td>\n",
567
+ " <td>{'popularity': 'high', 'month': 'April', 'budg...</td>\n",
568
+ " <td>Sarajevo has high popularity and medium budget...</td>\n",
569
+ " <td>['Sarajevo', 'Tallinn', 'Vilnius', 'Belgrade',...</td>\n",
570
+ " <td>Medium budget European city break in April wit...</td>\n",
571
+ " <td>Budget-friendly European city break in April w...</td>\n",
572
+ " <td>European cities with a grand, futuristic feel ...</td>\n",
573
+ " <td>'medium budget trip in April to a popular Euro...</td>\n",
574
+ " <td>\"Medium budget trip to a popular European city...</td>\n",
575
+ " <td>Based on the user's interest in Sins of a Sola...</td>\n",
576
+ " </tr>\n",
577
+ " <tr>\n",
578
+ " <th>197</th>\n",
579
+ " <td>c_p_10_pop_low_hard</td>\n",
580
+ " <td>p_10</td>\n",
581
+ " <td>An atheist, philosophy lecturer who encourages...</td>\n",
582
+ " <td>{'popularity': 'low', 'interests': 'Outdoors &amp;...</td>\n",
583
+ " <td>Malatya has low popularity and high budget. Ma...</td>\n",
584
+ " <td>['Malatya', 'Ioannina']</td>\n",
585
+ " <td>High-budget European city trip in February wit...</td>\n",
586
+ " <td>European city break in February. Low season &amp; ...</td>\n",
587
+ " <td>Where can I find European cities rich in histo...</td>\n",
588
+ " <td>\"Less crowded European destinations with water...</td>\n",
589
+ " <td>\"Less crowded European destinations for a pric...</td>\n",
590
+ " <td>Based on the provided information, I would gue...</td>\n",
591
+ " </tr>\n",
592
+ " <tr>\n",
593
+ " <th>198</th>\n",
594
+ " <td>c_p_11_pop_medium_sustainable</td>\n",
595
+ " <td>p_11</td>\n",
596
+ " <td>A young apprentice fascinated by the technolog...</td>\n",
597
+ " <td>{'popularity': 'medium', 'budget': 'low', 'mon...</td>\n",
598
+ " <td>Chelyabinsk has medium popularity , low budget...</td>\n",
599
+ " <td>['Chelyabinsk', 'Kirov', 'Podgorica', 'Rijeka'...</td>\n",
600
+ " <td>Suggest a low-budget, walkable European city b...</td>\n",
601
+ " <td>Budget-friendly European city break in April, ...</td>\n",
602
+ " <td>Which European cities offer glimpses into the ...</td>\n",
603
+ " <td>\"European cities with great walkability and lo...</td>\n",
604
+ " <td>\"European cities with industrial heritage site...</td>\n",
605
+ " <td>Based on the user's interest in technological ...</td>\n",
606
+ " </tr>\n",
607
+ " <tr>\n",
608
+ " <th>199</th>\n",
609
+ " <td>c_p_12_pop_high_easy</td>\n",
610
+ " <td>p_12</td>\n",
611
+ " <td>A high school guidance counselor in Winnebago ...</td>\n",
612
+ " <td>{'popularity': 'high', 'budget': 'medium'}</td>\n",
613
+ " <td>Belgrade has high popularity and medium budget...</td>\n",
614
+ " <td>['Belgrade', 'Bratislava', 'Budapest', 'Nicosi...</td>\n",
615
+ " <td>Popular and affordable European city break des...</td>\n",
616
+ " <td>Affordable, popular European capitals for a we...</td>\n",
617
+ " <td>Affordable, safe European cities with historic...</td>\n",
618
+ " <td>'medium budget european city breaks in popular...</td>\n",
619
+ " <td>\"Medium budget city breaks in Europe for a sch...</td>\n",
620
+ " <td>Based on the information provided, I'll make a...</td>\n",
621
+ " </tr>\n",
622
+ " </tbody>\n",
623
+ "</table>\n",
624
+ "<p>200 rows × 12 columns</p>\n",
625
+ "</div>"
626
+ ],
627
+ "text/plain": [
628
+ " config_id persona_id \\\n",
629
+ "0 c_p_0_pop_low_easy p_0 \n",
630
+ "1 c_p_1_pop_medium_medium p_1 \n",
631
+ "2 c_p_2_pop_high_hard p_2 \n",
632
+ "3 c_p_3_pop_low_sustainable p_3 \n",
633
+ "4 c_p_4_pop_medium_easy p_4 \n",
634
+ ".. ... ... \n",
635
+ "195 c_p_8_pop_medium_easy p_8 \n",
636
+ "196 c_p_9_pop_high_medium p_9 \n",
637
+ "197 c_p_10_pop_low_hard p_10 \n",
638
+ "198 c_p_11_pop_medium_sustainable p_11 \n",
639
+ "199 c_p_12_pop_high_easy p_12 \n",
640
+ "\n",
641
+ " persona \\\n",
642
+ "0 A top-scoring player in the local league who i... \n",
643
+ "1 A former DJ at WSUM who is now working as a mu... \n",
644
+ "2 A fellow agent-turned-author who shares the sa... \n",
645
+ "3 a film critic who dislikes storylines involvin... \n",
646
+ "4 A biology major conducting research on equine ... \n",
647
+ ".. ... \n",
648
+ "195 A junior Flash developer seeking guidance on c... \n",
649
+ "196 An avid gamer who has played Sins of a Solar E... \n",
650
+ "197 An atheist, philosophy lecturer who encourages... \n",
651
+ "198 A young apprentice fascinated by the technolog... \n",
652
+ "199 A high school guidance counselor in Winnebago ... \n",
653
+ "\n",
654
+ " filters \\\n",
655
+ "0 {'popularity': 'low', 'month': 'February'} \n",
656
+ "1 {'popularity': 'medium', 'budget': 'medium', '... \n",
657
+ "2 {'popularity': 'high', 'budget': 'low', 'inter... \n",
658
+ "3 {'popularity': 'low', 'interests': 'Outdoors &... \n",
659
+ "4 {'popularity': 'medium', 'budget': 'high'} \n",
660
+ ".. ... \n",
661
+ "195 {'popularity': 'medium', 'interests': 'Outdoor... \n",
662
+ "196 {'popularity': 'high', 'month': 'April', 'budg... \n",
663
+ "197 {'popularity': 'low', 'interests': 'Outdoors &... \n",
664
+ "198 {'popularity': 'medium', 'budget': 'low', 'mon... \n",
665
+ "199 {'popularity': 'high', 'budget': 'medium'} \n",
666
+ "\n",
667
+ " context \\\n",
668
+ "0 Adana has low popularity. Adana has low season... \n",
669
+ "1 Coimbra has medium popularity and medium budge... \n",
670
+ "2 Zagreb has high popularity and low budget. Zag... \n",
671
+ "3 Van has low popularity. Van has low season in ... \n",
672
+ "4 Aalborg has medium popularity and high budget.... \n",
673
+ ".. ... \n",
674
+ "195 Zaragoza has medium popularity. In Zaragoza yo... \n",
675
+ "196 Sarajevo has high popularity and medium budget... \n",
676
+ "197 Malatya has low popularity and high budget. Ma... \n",
677
+ "198 Chelyabinsk has medium popularity , low budget... \n",
678
+ "199 Belgrade has high popularity and medium budget... \n",
679
+ "\n",
680
+ " city \\\n",
681
+ "0 ['Adana', 'Adiyaman', 'Agri', 'Arad', 'Arkhang... \n",
682
+ "1 ['Coimbra', 'Brno', 'Braga'] \n",
683
+ "2 ['Zagreb', 'Volgograd', 'Tirana', 'Tbilisi', '... \n",
684
+ "3 ['Van', 'Uzhhorod', 'Trabzon', 'Thessaloniki',... \n",
685
+ "4 ['Aalborg', 'Astrakhan', 'Bari', 'Bremen', 'Ch... \n",
686
+ ".. ... \n",
687
+ "195 ['Zaragoza', 'Varna', 'Turku', 'Tampere', 'Szc... \n",
688
+ "196 ['Sarajevo', 'Tallinn', 'Vilnius', 'Belgrade',... \n",
689
+ "197 ['Malatya', 'Ioannina'] \n",
690
+ "198 ['Chelyabinsk', 'Kirov', 'Podgorica', 'Rijeka'... \n",
691
+ "199 ['Belgrade', 'Bratislava', 'Budapest', 'Nicosi... \n",
692
+ "\n",
693
+ " gemini_query_v \\\n",
694
+ "0 Cheap European city break in February.\\n \n",
695
+ "1 Medium budget European city break with parks a... \n",
696
+ "2 Cheap January city break in Europe with museum... \n",
697
+ "3 Cheap European city break in January with inte... \n",
698
+ "4 Suggest some moderately popular European citie... \n",
699
+ ".. ... \n",
700
+ "195 Good places to swim outdoors in Europe\\n \n",
701
+ "196 Medium budget European city break in April wit... \n",
702
+ "197 High-budget European city trip in February wit... \n",
703
+ "198 Suggest a low-budget, walkable European city b... \n",
704
+ "199 Popular and affordable European city break des... \n",
705
+ "\n",
706
+ " gemini_query_p0 \\\n",
707
+ "0 European city break in February, less crowded ... \n",
708
+ "1 European city break, medium budget, good parks... \n",
709
+ "2 European city break in January. Budget friendl... \n",
710
+ "3 European city break in January. Low-budget des... \n",
711
+ "4 High-budget European city with equestrian attr... \n",
712
+ ".. ... \n",
713
+ "195 Good places for a Flash developer to visit in ... \n",
714
+ "196 Budget-friendly European city break in April w... \n",
715
+ "197 European city break in February. Low season & ... \n",
716
+ "198 Budget-friendly European city break in April, ... \n",
717
+ "199 Affordable, popular European capitals for a we... \n",
718
+ "\n",
719
+ " gemini_query_p1 \\\n",
720
+ "0 Best European cities for intense physical trai... \n",
721
+ "1 Best European cities for live music, especiall... \n",
722
+ "2 Where can I find inspiring European cities wit... \n",
723
+ "3 Best European cities for unique, artistic expe... \n",
724
+ "4 European cities with renowned veterinary or ag... \n",
725
+ ".. ... \n",
726
+ "195 Best European cities for tech meetups & cowork... \n",
727
+ "196 European cities with a grand, futuristic feel ... \n",
728
+ "197 Where can I find European cities rich in histo... \n",
729
+ "198 Which European cities offer glimpses into the ... \n",
730
+ "199 Affordable, safe European cities with historic... \n",
731
+ "\n",
732
+ " llama_query_v \\\n",
733
+ "0 \"Less crowded European cities to visit in Febr... \n",
734
+ "1 'medium budget European city breaks with parks... \n",
735
+ "2 \"Looking for a popular and affordable European... \n",
736
+ "3 \"European cities with low popularity, monaster... \n",
737
+ "4 \"European cities for a luxurious trip.\" \n",
738
+ ".. ... \n",
739
+ "195 \"European cities with medium popularity for ou... \n",
740
+ "196 'medium budget trip in April to a popular Euro... \n",
741
+ "197 \"Less crowded European destinations with water... \n",
742
+ "198 \"European cities with great walkability and lo... \n",
743
+ "199 'medium budget european city breaks in popular... \n",
744
+ "\n",
745
+ " llama_query_p0 \\\n",
746
+ "0 \"European cities with ice hockey facilities, l... \n",
747
+ "1 \"Medium budget European cities with parks and ... \n",
748
+ "2 \"Low-budget European cities with museums and n... \n",
749
+ "3 \"off the beaten path European city breaks in l... \n",
750
+ "4 \"European cities with horse riding trails and ... \n",
751
+ ".. ... \n",
752
+ "195 A junior Flash developer seeking guidance on c... \n",
753
+ "196 \"Medium budget trip to a popular European city... \n",
754
+ "197 \"Less crowded European destinations for a pric... \n",
755
+ "198 \"European cities with industrial heritage site... \n",
756
+ "199 \"Medium budget city breaks in Europe for a sch... \n",
757
+ "\n",
758
+ " llama_query_p1 \n",
759
+ "0 Based on the user's profile as a top-scoring p... \n",
760
+ "1 Based on the user's background as a former DJ ... \n",
761
+ "2 Based on the provided information, I'm going t... \n",
762
+ "3 Based on the given information, I'll create a ... \n",
763
+ "4 Based on the user's background and interests, ... \n",
764
+ ".. ... \n",
765
+ "195 Which European cities offer a mix of cultural ... \n",
766
+ "196 Based on the user's interest in Sins of a Sola... \n",
767
+ "197 Based on the provided information, I would gue... \n",
768
+ "198 Based on the user's interest in technological ... \n",
769
+ "199 Based on the information provided, I'll make a... \n",
770
+ "\n",
771
+ "[200 rows x 12 columns]"
772
+ ]
773
+ },
774
+ "execution_count": 9,
775
+ "metadata": {},
776
+ "output_type": "execute_result"
777
+ }
778
+ ],
779
+ "source": [
780
+ "merged = pd.merge(gemini, llama, on=common_cols, how=\"right\")\n",
781
+ "print(merged.shape)\n",
782
+ "merged"
783
+ ]
784
+ },
785
+ {
786
+ "cell_type": "code",
787
+ "execution_count": 11,
788
+ "id": "ed6dca09",
789
+ "metadata": {},
790
+ "outputs": [
791
+ {
792
+ "data": {
793
+ "text/plain": [
794
+ "config_id 0\n",
795
+ "persona_id 0\n",
796
+ "persona 0\n",
797
+ "filters 0\n",
798
+ "context 0\n",
799
+ "city 0\n",
800
+ "gemini_query_v 0\n",
801
+ "gemini_query_p0 0\n",
802
+ "gemini_query_p1 0\n",
803
+ "llama_query_v 0\n",
804
+ "llama_query_p0 0\n",
805
+ "llama_query_p1 0\n",
806
+ "dtype: int64"
807
+ ]
808
+ },
809
+ "execution_count": 11,
810
+ "metadata": {},
811
+ "output_type": "execute_result"
812
+ }
813
+ ],
814
+ "source": [
815
+ "merged.isna().sum()"
816
+ ]
817
+ },
818
+ {
819
+ "cell_type": "code",
820
+ "execution_count": 12,
821
+ "id": "9693f553",
822
+ "metadata": {},
823
+ "outputs": [],
824
+ "source": [
825
+ "merged.to_csv(\"../../data/user-evaluation/merged.csv\", index=False)"
826
+ ]
827
+ },
828
+ {
829
+ "cell_type": "code",
830
+ "execution_count": null,
831
+ "id": "93dba3ec",
832
+ "metadata": {},
833
+ "outputs": [],
834
+ "source": []
835
+ }
836
+ ],
837
+ "metadata": {
838
+ "kernelspec": {
839
+ "display_name": ".crs-venv",
840
+ "language": "python",
841
+ "name": ".crs-venv"
842
+ },
843
+ "language_info": {
844
+ "codemirror_mode": {
845
+ "name": "ipython",
846
+ "version": 3
847
+ },
848
+ "file_extension": ".py",
849
+ "mimetype": "text/x-python",
850
+ "name": "python",
851
+ "nbconvert_exporter": "python",
852
+ "pygments_lexer": "ipython3",
853
+ "version": "3.10.15"
854
+ }
855
+ },
856
+ "nbformat": 4,
857
+ "nbformat_minor": 5
858
+ }
views/questions_screen.py CHANGED
@@ -4,7 +4,7 @@ from datetime import datetime
4
  import os
5
  from dotenv import load_dotenv
6
  from views.nav_buttons import navigation_buttons
7
-
8
  load_dotenv()
9
 
10
 
@@ -80,11 +80,11 @@ def questions_screen(data):
80
  st.text_area("", config['context'], height=300, disabled=False)
81
 
82
  # Render queries and collect ratings
83
- query_v_ratings = render_query_ratings("Query_v", config, "query_v", current_index)
84
  query_p0_ratings = render_query_ratings("Query_p0",
85
- config, "query_p0", current_index, has_persona_alignment=True)
86
  query_p1_ratings = render_query_ratings("Query_p1",
87
- config, "query_p1",
88
  current_index, has_persona_alignment=True)
89
 
90
  # Additional comments
 
4
  import os
5
  from dotenv import load_dotenv
6
  from views.nav_buttons import navigation_buttons
7
+ st.set_page_config(layout="wide")
8
  load_dotenv()
9
 
10
 
 
80
  st.text_area("", config['context'], height=300, disabled=False)
81
 
82
  # Render queries and collect ratings
83
+ query_v_ratings = render_query_ratings("Query_v", config, "gemini_query_v", current_index)
84
  query_p0_ratings = render_query_ratings("Query_p0",
85
+ config, "gemini_query_p0", current_index, has_persona_alignment=True)
86
  query_p1_ratings = render_query_ratings("Query_p1",
87
+ config, "gemini_query_p1",
88
  current_index, has_persona_alignment=True)
89
 
90
  # Additional comments