Spaces:

ashmib
/

user-feedback

Sleeping

App Files Files Community

Ashmi Banerjee commited on Feb 2

Commit

48fa8cf

1 Parent(s): 4e9003c

removed context, cities, replaced relevance with groundedness

Browse files

Files changed (3) hide show

static/instructions.html +41 -0
utils/loaders.py +6 -0
views/questions_screen.py +16 -38

static/instructions.html ADDED Viewed

	@@ -0,0 +1,41 @@

+<p style='font-size:large;'>
+    You will be <mark>given a user profile and a travel-related query</mark>.
+    Your task is to <mark>evaluate the generated queries (numbered 1-6)</mark> based on the
+    following criteria:</p>
+<p><strong><mark>Groundedness</mark>:</strong>
+            Evaluate how well the query incorporates the given filters.
+            <br> Select one of the following options:
+            <ol style="padding-left:2rem;">
+                <li><b>Not Grounded</b> - None of the filters are present in the query.</li> <li><b>Partially Grounded</b> -
+            Some filters are present, but not all. </li>
+                <li><b>Fully Grounded</b> - All provided filters are accurately reflected in the query.
+            </li>
+                <li><b>Unclear</b> - It is difficult to determine whether the filters are included.</li>
+            </ol>
+</p>
+<p><strong><mark>Clarity Assessment</mark>:</strong>
+    Evaluate how clear and understandable the query is.
+    Consider whether it is grammatically correct and easy to interpret.
+    <br>Your options are:
+    <ol style="padding-left:2rem;">
+        <li><b>Not Clear</b> - The query is difficult to understand or contains
+            significant grammatical errors.</li>
+        li><b>Somewhat Clear</b> - The query is understandable but may have
+            minor grammatical issues or slight ambiguity.</li>
+        <li><b>Very Clear</b> - The query is well-formed,
+            grammatically correct, and easy to understand.</li>
+    </ol>
+</p>
+<p><strong><mark>Persona Alignment</mark>:</strong>
+    How likely is the query to match the persona and reflect a question they would ask about travel?
+    <br>Your options are:
+    <ol style='padding-left:2rem;'>
+        <li><b>Not Aligned</b> - The user is not likely at all to ask this query.</li>
+        <li><b>Partially Aligned</b> - The user is quite likely to ask this query.</li>
+        <li><b>Aligned</b> - The user is very likely to ask this query. </li>
+        <li><b>Unclear</b> - It is unclear whether the user will ask this query.</li> </ol>
+</p>
+<p><strong><mark>Additional Comments (Optional)</mark>:</strong>
+    If you have any feedback, remarks, or interesting observations about the data, you can leave them here.
+    This is completely optional.
+</p>

utils/loaders.py CHANGED Viewed

@@ -9,6 +9,7 @@ HF_TOKEN = os.getenv("HF_TOKEN")
 REPO_NAME = os.getenv("DATA_REPO")
 DATA_FILES = os.getenv("GEMINI_DATA_FILES")
 @st.cache_data
 def load_data():
     try:
@@ -22,3 +23,8 @@ def load_data():
         dataset.set_format(type='pandas')  ## converting it into pandas
         df = dataset["train"][:]
         return df[:5]

 REPO_NAME = os.getenv("DATA_REPO")
 DATA_FILES = os.getenv("GEMINI_DATA_FILES")
 @st.cache_data
 def load_data():
     try:
         dataset.set_format(type='pandas')  ## converting it into pandas
         df = dataset["train"][:]
         return df[:5]
+def load_html(file_name):
+    with open(file_name, 'r') as file:
+        return file.read()

views/questions_screen.py CHANGED Viewed

@@ -4,7 +4,7 @@ from datetime import datetime
 from dotenv import load_dotenv
 from views.nav_buttons import navigation_buttons
 import random
 load_dotenv()
@@ -106,7 +106,7 @@ def render_query_ratings(
 ):
     """Helper function to render ratings for a given query."""
     stored_query_ratings = get_previous_ratings(model_name, query_key, current_index)
-    stored_relevance = stored_query_ratings.get("relevance", 0)
     stored_clarity = stored_query_ratings.get("clarity", 0)
     stored_persona_alignment = (
         stored_query_ratings.get("persona_alignment", 0) if has_persona_alignment else 0
@@ -145,14 +145,14 @@ def render_query_ratings(
                 cols[0],
             )
-        relevance_rating = render_single_rating(
-            "Relevance:",
             options,
-            lambda x: ["N/A", "Not Relevant", "Somewhat Relevant", "Relevant", "Unclear"][
                 x
             ],
-            f"rating_{model_name}{query_key}_relevance_",
-            stored_relevance,
             cols[1],
         )
@@ -167,7 +167,7 @@ def render_query_ratings(
     return {
         "clarity": clarity_rating,
-        "relevance": relevance_rating,
         "persona_alignment": persona_alignment_rating if has_persona_alignment else None,
     }
@@ -224,41 +224,19 @@ def questions_screen(data):
         st.write(f"Question {current_index + 1} of {len(data)}")
         # st.subheader(f"Config ID: {config['config_id']}")
         st.markdown("### Instructions")
         with st.expander("Instructions", expanded=False):
-            st.html('''<p style='font-size:large;'>You will be <mark>given a user profile and a travel-related
-            query</mark>. Your task is to <mark>evaluate the generated queries (numbered 1-6)</mark> based on the
-            following criteria:</p> <p><strong><mark>Relevance</mark>:</strong> Evaluate how well the query aligns
-            with the given cities, filters, and displayed context. Consider whether the query description matches the
-            cities and context provided (click on <em><strong>Full Context</strong></em> to expand). <br> Select one
-            of the following options: <ol style="padding-left:2rem;"> <li><b>Not Relevant</b> - The query has no
-            connection to the cities, filters, or displayed context.</li> <li><b>Somewhat Relevant</b> - The query is
-            partially related but does not fully match the cities or context.</li> <li><b>Relevant</b> - The query
-            clearly aligns with the cities, filters, and displayed context.</li> <li><b>Unclear</b> - The relevance
-            of the query is difficult to determine based on the given information.</li> </ol> </p>
-            <p><strong><mark>Clarity Assessment</mark>:</strong> Evaluate how clear and understandable the query is.
-            Consider whether it is grammatically correct and easy to interpret. <br>Your options are: <ol
-            style="padding-left:2rem;"> <li><b>Not Clear</b> - The query is difficult to understand or contains
-            significant grammatical errors.</li> <li><b>Somewhat Clear</b> - The query is understandable but may have
-            minor grammatical issues or slight ambiguity.</li> <li><b>Very Clear</b> - The query is well-formed,
-            grammatically correct, and easy to understand.</li> </ol> </p> <p> <strong><mark>Persona
-            Alignment</mark>:</strong> How likely is the query to match the persona and reflect a question they would
-            ask about travel? <br>Your options are: <ol style='padding-left:2rem;'> <li><b>Not Aligned</b> - The user
-            is not likely at all to ask this query.</li> <li><b>Partially Aligned</b> - The user is quite likely to
-            ask this query.</li> <li><b>Aligned</b> - The user is very likely to ask this query. </li>
-            <li><b>Unclear</b> - It is unclear whether the user will ask this query.</li> </ol> </p> <p>
-            <strong><mark>Additional Comments (Optional)</mark>:</strong> If you have any feedback, remarks,
-            or interesting observations about the data, you can leave them here. This is completely optional. </p>
-            ''')
         # Context information
         st.markdown("### Context Information")
         with st.expander("Persona", expanded=True):
             st.write(config["persona"])
-        with st.expander("Filters & Cities", expanded=True):
-            st.write("**Filters:**", config["filters"])
-            st.write("**Cities:**", config["city"])
-        with st.expander("Full Context", expanded=False):
-            st.text_area("", config["context"], height=300, disabled=False)
         g_ratings = display_ratings_row("gemini", config, current_index)
         l_ratings = display_ratings_row("llama", config, current_index)

 from dotenv import load_dotenv
 from views.nav_buttons import navigation_buttons
 import random
+from utils.loaders import load_html
 load_dotenv()
 ):
     """Helper function to render ratings for a given query."""
     stored_query_ratings = get_previous_ratings(model_name, query_key, current_index)
+    stored_groundedness = stored_query_ratings.get("groundedness", 0)
     stored_clarity = stored_query_ratings.get("clarity", 0)
     stored_persona_alignment = (
         stored_query_ratings.get("persona_alignment", 0) if has_persona_alignment else 0
                 cols[0],
             )
+        groundedness_rating = render_single_rating(
+            "Groundedness:",
             options,
+            lambda x: ["N/A", "Not Grounded", "Partially Grounded", "Grounded", "Unclear"][
                 x
             ],
+            f"rating_{model_name}{query_key}_groundedness_",
+            stored_groundedness,
             cols[1],
         )
     return {
         "clarity": clarity_rating,
+        "groundedness": groundedness_rating,
         "persona_alignment": persona_alignment_rating if has_persona_alignment else None,
     }
         st.write(f"Question {current_index + 1} of {len(data)}")
         # st.subheader(f"Config ID: {config['config_id']}")
         st.markdown("### Instructions")
+        instructions_html = load_html("static/instructions.html")
         with st.expander("Instructions", expanded=False):
+            st.html(instructions_html)
         # Context information
         st.markdown("### Context Information")
         with st.expander("Persona", expanded=True):
             st.write(config["persona"])
+        with st.expander("Filters", expanded=True):
+            st.code(config["filters"], language="json")
+            # st.write("**Cities:**", config["city"])
+        # with st.expander("Full Context", expanded=False):
+        #     st.text_area("", config["context"], height=300, disabled=False)
         g_ratings = display_ratings_row("gemini", config, current_index)
         l_ratings = display_ratings_row("llama", config, current_index)