Spaces:
Runtime error
Runtime error
File size: 6,772 Bytes
ac6c40f 13fd677 ac6c40f 0ea4f6d dd1054a 13fd677 ac6c40f 57616af ac6c40f 13fd677 0ea4f6d dd1054a 0ea4f6d 13fd677 0ea4f6d dd1054a 0ea4f6d ead9ac7 d822486 13fd677 ead9ac7 0ea4f6d dd1054a 13fd677 dd1054a 13fd677 ead9ac7 13fd677 dd1054a 13fd677 fcb0120 13fd677 fcb0120 13fd677 dd1054a 13fd677 dd1054a 13fd677 dd1054a 13fd677 dd1054a 13fd677 ac6c40f 57616af ac6c40f 13fd677 3578aa2 13fd677 969e2c4 13fd677 dd1054a 13fd677 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
import streamlit as st
from .streamlit_utils import (
make_multiselect,
make_selectbox,
make_text_area,
make_text_input,
make_radio,
)
N_FIELDS_PII = 1
N_FIELDS_LICENSES = 2
N_FIELDS_LIMITATIONS = 3
N_FIELDS = N_FIELDS_PII + N_FIELDS_LICENSES + N_FIELDS_LIMITATIONS
def considerations_page():
st.session_state.card_dict["considerations"] = st.session_state.card_dict.get(
"considerations", {}
)
with st.expander("PII Risks and Liability", expanded=False):
key_pref = ["considerations", "pii"]
st.session_state.card_dict["considerations"]["pii"] = st.session_state.card_dict[
"considerations"
].get("pii", {})
make_text_area(
label="Considering your answers to the PII part of the Data Curation Section, describe any potential privacy to the data subjects and creators risks when using the dataset.",
key_list=key_pref+["risks-description"],
help="In terms for example of having models memorize private information of data subjects or other breaches of privacy."
)
with st.expander("Licenses", expanded=False):
key_pref = ["considerations", "licenses"]
st.session_state.card_dict["considerations"]["licenses"] = st.session_state.card_dict[
"considerations"
].get("licenses", {})
make_multiselect(
label="Based on your answers in the Intended Use part of the Data Overview Section, which of the following best describe the copyright and licensing status of the dataset?",
options=[
"public domain",
"multiple licenses",
"copyright - all rights reserved",
"open license - commercial use allowed",
"research use only",
"non-commercial use only",
"do not distribute",
"other",
],
key_list=key_pref + ["dataset-restrictions"],
help="Does the license restrict how the dataset can be used?",
)
if "other" in st.session_state.card_dict["considerations"]["licenses"].get("dataset-restrictions", []):
make_text_area(
label="You selected `other` for the dataset licensing status, please elaborate here:",
key_list=key_pref+["dataset-restrictions-other"]
)
else:
st.session_state.card_dict["considerations"]["licenses"]["dataset-restrictions-other"] = "N/A"
make_multiselect(
label="Based on your answers in the Language part of the Data Curation Section, which of the following best describe the copyright and licensing status of the underlying language data?",
options=[
"public domain",
"multiple licenses",
"copyright - all rights reserved",
"open license - commercial use allowed",
"research use only",
"non-commercial use only",
"do not distribute",
"other",
],
key_list=key_pref + ["data-copyright"],
help="For example if the dataset uses data from Wikipedia, we are asking about the status of Wikipedia text in general.",
)
if "other" in st.session_state.card_dict["considerations"]["licenses"].get("data-copyright", []):
make_text_area(
label="You selected `other` for the source data licensing status, please elaborate here:",
key_list=key_pref+["data-copyright-other"]
)
else:
st.session_state.card_dict["considerations"]["licenses"]["data-copyright-other"] = "N/A"
with st.expander("Known Technical Limitations", expanded=False):
key_pref = ["considerations", "limitations"]
st.session_state.card_dict["considerations"]["limitations"] = st.session_state.card_dict[
"considerations"
].get("limitations", {})
make_text_area(
label="Describe any known technical limitations, such as spurrious correlations, train/test overlap, annotation biases, or mis-annotations, " + \
"and cite the works that first identified these limitations when possible.",
key_list=key_pref + ["data-technical-limitations"],
help="Outline any properties of the dataset that might lead a trained model with good performance on the metric to not behave as expected.",
)
make_text_area(
label="When using a model trained on this dataset in a setting where users or the public may interact with its predictions, what are some pitfalls to look out for? " + \
"In particular, describe some applications of the general task featured in this dataset that its curation or properties make it less suitable for.",
key_list=key_pref + ["data-unsuited-applications"],
help="For example, outline language varieties or domains that the model might underperform for.",
)
make_text_area(
label="What are some discouraged use cases of a model trained to maximize the proposed metrics on this dataset? " +
"In particular, think about settings where decisions made by a model that performs reasonably well on the metric my still have strong negative consequences for user or members of the public.",
key_list=key_pref + ["data-discouraged-use"],
help="For example, think about application settings where certain types of mistakes (such as missing a negation) might have a particularly strong negative impact but are not particularly singled out by the aggregated evaluation.",
)
def considerations_summary():
total_filled = sum(
[len(dct) for dct in st.session_state.card_dict.get("considerations", {}).values()]
)
with st.expander(
f"Considerations for Using Data Completion - {total_filled} of {N_FIELDS}", expanded=False
):
completion_markdown = ""
completion_markdown += (
f"- **Overall completion:**\n - {total_filled} of {N_FIELDS} fields\n"
)
completion_markdown += f"- **Sub-section - PII Risks and Liability:**\n - {len(st.session_state.card_dict.get('considerations', {}).get('pii', {}))} of {N_FIELDS_PII} fields\n"
completion_markdown += f"- **Sub-section - Licenses:**\n - {len(st.session_state.card_dict.get('considerations', {}).get('licenses', {}))} of {N_FIELDS_LICENSES} fields\n"
completion_markdown += f"- **Sub-section - Known Technical Limitations:**\n - {len(st.session_state.card_dict.get('considerations', {}).get('limitations', {}))} of {N_FIELDS_LIMITATIONS} fields\n"
st.markdown(completion_markdown)
|