Ashmi Banerjee commited on
Commit
48fa8cf
·
1 Parent(s): 4e9003c

removed context, cities, replaced relevance with groundedness

Browse files
static/instructions.html ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <p style='font-size:large;'>
2
+ You will be <mark>given a user profile and a travel-related query</mark>.
3
+ Your task is to <mark>evaluate the generated queries (numbered 1-6)</mark> based on the
4
+ following criteria:</p>
5
+ <p><strong><mark>Groundedness</mark>:</strong>
6
+ Evaluate how well the query incorporates the given filters.
7
+ <br> Select one of the following options:
8
+ <ol style="padding-left:2rem;">
9
+ <li><b>Not Grounded</b> - None of the filters are present in the query.</li> <li><b>Partially Grounded</b> -
10
+ Some filters are present, but not all. </li>
11
+ <li><b>Fully Grounded</b> - All provided filters are accurately reflected in the query.
12
+ </li>
13
+ <li><b>Unclear</b> - It is difficult to determine whether the filters are included.</li>
14
+ </ol>
15
+ </p>
16
+ <p><strong><mark>Clarity Assessment</mark>:</strong>
17
+ Evaluate how clear and understandable the query is.
18
+ Consider whether it is grammatically correct and easy to interpret.
19
+ <br>Your options are:
20
+ <ol style="padding-left:2rem;">
21
+ <li><b>Not Clear</b> - The query is difficult to understand or contains
22
+ significant grammatical errors.</li>
23
+ li><b>Somewhat Clear</b> - The query is understandable but may have
24
+ minor grammatical issues or slight ambiguity.</li>
25
+ <li><b>Very Clear</b> - The query is well-formed,
26
+ grammatically correct, and easy to understand.</li>
27
+ </ol>
28
+ </p>
29
+ <p><strong><mark>Persona Alignment</mark>:</strong>
30
+ How likely is the query to match the persona and reflect a question they would ask about travel?
31
+ <br>Your options are:
32
+ <ol style='padding-left:2rem;'>
33
+ <li><b>Not Aligned</b> - The user is not likely at all to ask this query.</li>
34
+ <li><b>Partially Aligned</b> - The user is quite likely to ask this query.</li>
35
+ <li><b>Aligned</b> - The user is very likely to ask this query. </li>
36
+ <li><b>Unclear</b> - It is unclear whether the user will ask this query.</li> </ol>
37
+ </p>
38
+ <p><strong><mark>Additional Comments (Optional)</mark>:</strong>
39
+ If you have any feedback, remarks, or interesting observations about the data, you can leave them here.
40
+ This is completely optional.
41
+ </p>
utils/loaders.py CHANGED
@@ -9,6 +9,7 @@ HF_TOKEN = os.getenv("HF_TOKEN")
9
  REPO_NAME = os.getenv("DATA_REPO")
10
  DATA_FILES = os.getenv("GEMINI_DATA_FILES")
11
 
 
12
  @st.cache_data
13
  def load_data():
14
  try:
@@ -22,3 +23,8 @@ def load_data():
22
  dataset.set_format(type='pandas') ## converting it into pandas
23
  df = dataset["train"][:]
24
  return df[:5]
 
 
 
 
 
 
9
  REPO_NAME = os.getenv("DATA_REPO")
10
  DATA_FILES = os.getenv("GEMINI_DATA_FILES")
11
 
12
+
13
  @st.cache_data
14
  def load_data():
15
  try:
 
23
  dataset.set_format(type='pandas') ## converting it into pandas
24
  df = dataset["train"][:]
25
  return df[:5]
26
+
27
+
28
+ def load_html(file_name):
29
+ with open(file_name, 'r') as file:
30
+ return file.read()
views/questions_screen.py CHANGED
@@ -4,7 +4,7 @@ from datetime import datetime
4
  from dotenv import load_dotenv
5
  from views.nav_buttons import navigation_buttons
6
  import random
7
-
8
  load_dotenv()
9
 
10
 
@@ -106,7 +106,7 @@ def render_query_ratings(
106
  ):
107
  """Helper function to render ratings for a given query."""
108
  stored_query_ratings = get_previous_ratings(model_name, query_key, current_index)
109
- stored_relevance = stored_query_ratings.get("relevance", 0)
110
  stored_clarity = stored_query_ratings.get("clarity", 0)
111
  stored_persona_alignment = (
112
  stored_query_ratings.get("persona_alignment", 0) if has_persona_alignment else 0
@@ -145,14 +145,14 @@ def render_query_ratings(
145
  cols[0],
146
  )
147
 
148
- relevance_rating = render_single_rating(
149
- "Relevance:",
150
  options,
151
- lambda x: ["N/A", "Not Relevant", "Somewhat Relevant", "Relevant", "Unclear"][
152
  x
153
  ],
154
- f"rating_{model_name}{query_key}_relevance_",
155
- stored_relevance,
156
  cols[1],
157
  )
158
 
@@ -167,7 +167,7 @@ def render_query_ratings(
167
 
168
  return {
169
  "clarity": clarity_rating,
170
- "relevance": relevance_rating,
171
  "persona_alignment": persona_alignment_rating if has_persona_alignment else None,
172
  }
173
 
@@ -224,41 +224,19 @@ def questions_screen(data):
224
  st.write(f"Question {current_index + 1} of {len(data)}")
225
  # st.subheader(f"Config ID: {config['config_id']}")
226
  st.markdown("### Instructions")
 
227
  with st.expander("Instructions", expanded=False):
228
- st.html('''<p style='font-size:large;'>You will be <mark>given a user profile and a travel-related
229
- query</mark>. Your task is to <mark>evaluate the generated queries (numbered 1-6)</mark> based on the
230
- following criteria:</p> <p><strong><mark>Relevance</mark>:</strong> Evaluate how well the query aligns
231
- with the given cities, filters, and displayed context. Consider whether the query description matches the
232
- cities and context provided (click on <em><strong>Full Context</strong></em> to expand). <br> Select one
233
- of the following options: <ol style="padding-left:2rem;"> <li><b>Not Relevant</b> - The query has no
234
- connection to the cities, filters, or displayed context.</li> <li><b>Somewhat Relevant</b> - The query is
235
- partially related but does not fully match the cities or context.</li> <li><b>Relevant</b> - The query
236
- clearly aligns with the cities, filters, and displayed context.</li> <li><b>Unclear</b> - The relevance
237
- of the query is difficult to determine based on the given information.</li> </ol> </p>
238
- <p><strong><mark>Clarity Assessment</mark>:</strong> Evaluate how clear and understandable the query is.
239
- Consider whether it is grammatically correct and easy to interpret. <br>Your options are: <ol
240
- style="padding-left:2rem;"> <li><b>Not Clear</b> - The query is difficult to understand or contains
241
- significant grammatical errors.</li> <li><b>Somewhat Clear</b> - The query is understandable but may have
242
- minor grammatical issues or slight ambiguity.</li> <li><b>Very Clear</b> - The query is well-formed,
243
- grammatically correct, and easy to understand.</li> </ol> </p> <p> <strong><mark>Persona
244
- Alignment</mark>:</strong> How likely is the query to match the persona and reflect a question they would
245
- ask about travel? <br>Your options are: <ol style='padding-left:2rem;'> <li><b>Not Aligned</b> - The user
246
- is not likely at all to ask this query.</li> <li><b>Partially Aligned</b> - The user is quite likely to
247
- ask this query.</li> <li><b>Aligned</b> - The user is very likely to ask this query. </li>
248
- <li><b>Unclear</b> - It is unclear whether the user will ask this query.</li> </ol> </p> <p>
249
- <strong><mark>Additional Comments (Optional)</mark>:</strong> If you have any feedback, remarks,
250
- or interesting observations about the data, you can leave them here. This is completely optional. </p>
251
-
252
- ''')
253
  # Context information
254
  st.markdown("### Context Information")
255
  with st.expander("Persona", expanded=True):
256
  st.write(config["persona"])
257
- with st.expander("Filters & Cities", expanded=True):
258
- st.write("**Filters:**", config["filters"])
259
- st.write("**Cities:**", config["city"])
260
- with st.expander("Full Context", expanded=False):
261
- st.text_area("", config["context"], height=300, disabled=False)
262
 
263
  g_ratings = display_ratings_row("gemini", config, current_index)
264
  l_ratings = display_ratings_row("llama", config, current_index)
 
4
  from dotenv import load_dotenv
5
  from views.nav_buttons import navigation_buttons
6
  import random
7
+ from utils.loaders import load_html
8
  load_dotenv()
9
 
10
 
 
106
  ):
107
  """Helper function to render ratings for a given query."""
108
  stored_query_ratings = get_previous_ratings(model_name, query_key, current_index)
109
+ stored_groundedness = stored_query_ratings.get("groundedness", 0)
110
  stored_clarity = stored_query_ratings.get("clarity", 0)
111
  stored_persona_alignment = (
112
  stored_query_ratings.get("persona_alignment", 0) if has_persona_alignment else 0
 
145
  cols[0],
146
  )
147
 
148
+ groundedness_rating = render_single_rating(
149
+ "Groundedness:",
150
  options,
151
+ lambda x: ["N/A", "Not Grounded", "Partially Grounded", "Grounded", "Unclear"][
152
  x
153
  ],
154
+ f"rating_{model_name}{query_key}_groundedness_",
155
+ stored_groundedness,
156
  cols[1],
157
  )
158
 
 
167
 
168
  return {
169
  "clarity": clarity_rating,
170
+ "groundedness": groundedness_rating,
171
  "persona_alignment": persona_alignment_rating if has_persona_alignment else None,
172
  }
173
 
 
224
  st.write(f"Question {current_index + 1} of {len(data)}")
225
  # st.subheader(f"Config ID: {config['config_id']}")
226
  st.markdown("### Instructions")
227
+ instructions_html = load_html("static/instructions.html")
228
  with st.expander("Instructions", expanded=False):
229
+ st.html(instructions_html)
230
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  # Context information
232
  st.markdown("### Context Information")
233
  with st.expander("Persona", expanded=True):
234
  st.write(config["persona"])
235
+ with st.expander("Filters", expanded=True):
236
+ st.code(config["filters"], language="json")
237
+ # st.write("**Cities:**", config["city"])
238
+ # with st.expander("Full Context", expanded=False):
239
+ # st.text_area("", config["context"], height=300, disabled=False)
240
 
241
  g_ratings = display_ratings_row("gemini", config, current_index)
242
  l_ratings = display_ratings_row("llama", config, current_index)