Spaces:

GEM
/

DatasetCardForm

Runtime error

App Files Files Community

Yacine Jernite commited on Nov 16, 2021

Commit

cc73923

1 Parent(s): 969e2c4

completed_v1

Browse files

Files changed (2) hide show

datacards/context.py +17 -21
datacards/curation.py +6 -1

datacards/context.py CHANGED Viewed

@@ -73,29 +73,25 @@ def context_page():
         st.session_state.card_dict["context"]["biases"] = st.session_state.card_dict[
             "context"
         ].get("biases", {})
-        make_text_area(
-            label="Are there documented biases in the data?",
-            key_list=key_pref + [""],
-            help="",
-        )
-        make_text_area(
-            label="Link to analyses",
-            key_list=key_pref + [""],
-            help="",
-        )
-        make_text_area(
-            label="How does the distribution of language producers differ from a base distribution?",
-            key_list=key_pref + [""],
-            help="",
         )
         make_text_area(
-            label="Topic coverage?",
-            key_list=key_pref + [""],
-            help="",
         )

         st.session_state.card_dict["context"]["biases"] = st.session_state.card_dict[
             "context"
         ].get("biases", {})
+        make_radio(
+            label="Are there documented social biases in the dataset? " + \
+                "Biases in this context are variations in the ways members of different social categories are represented that can have harmful downstream consequences for members of the more disadvantaged group.",
+            options=["yes", "unsure", "no"],
+            key_list=key_pref + ["has-biases"],
+            help="For a more extensive definition of social biases, see [Language (Technology) is Power: A Critical Survey of “Bias” in NLP ](https://aclanthology.org/2020.acl-main.485.pdf)",
         )
+        if st.session_state.card_dict["context"]["biases"]["has-biases"] == "yes":
+            make_text_area(
+                label="Provide links to and summaries of works analyzing these biases.",
+                key_list=key_pref + ["bias-analyses"],
+                help="The analyses can take the form of academic papers or news articles, or even blog posts.",
+            )
+        else:
+            st.session_state.card_dict["context"]["biases"]["bias-analyses"] = "N/A"
         make_text_area(
+            label="Does the distribution of language producers in the dataset accurately represent the full distribution of speakers of the language world-wide? If not, how does it differ?",
+            key_list=key_pref + ["speaker-distibution"],
+            help="For example, are most speakers in the dataset of a certain gender or located in a certain county?",
         )

datacards/curation.py CHANGED Viewed

@@ -11,7 +11,7 @@ from .streamlit_utils import (
 )
 N_FIELDS_ORIGINAL = 4
-N_FIELDS_LANGUAGE = 10
 N_FIELDS_ANNOTATIONS = 10
 N_FIELDS_CONSENT = 4
 N_FIELDS_PII = 7
@@ -120,6 +120,11 @@ def curation_page():
             key_list=key_pref + ["producers-description"],
             help="Provide a description of the context in which the language was produced and who produced it.",
         )
         make_selectbox(
             label="Was the text validated by a different worker or a data curator?",
             options=[

 )
 N_FIELDS_ORIGINAL = 4
+N_FIELDS_LANGUAGE = 11
 N_FIELDS_ANNOTATIONS = 10
 N_FIELDS_CONSENT = 4
 N_FIELDS_PII = 7
             key_list=key_pref + ["producers-description"],
             help="Provide a description of the context in which the language was produced and who produced it.",
         )
+        make_text_area(
+            label="Does the language in the dataset focus on specific topics? How would you describe them?",
+            key_list=key_pref + ["topics"],
+            help="for example, tourism, entertainment, etc.",
+        )
         make_selectbox(
             label="Was the text validated by a different worker or a data curator?",
             options=[