import streamlit as st

from .streamlit_utils import make_text_input

from .streamlit_utils import (
    make_multiselect,
    make_selectbox,
    make_text_area,
    make_text_input,
    make_radio,
)

N_FIELDS_ORIGINAL = 4
N_FIELDS_LANGUAGE = 12
N_FIELDS_ANNOTATIONS = 0
N_FIELDS_CONSENT = 0
N_FIELDS_PII = 0
N_FIELDS_MAINTENANCE = 0
N_FIELDS_GEM = 0

N_FIELDS = (
    N_FIELDS_ORIGINAL
    + N_FIELDS_LANGUAGE
    + N_FIELDS_ANNOTATIONS
    + N_FIELDS_CONSENT
    + N_FIELDS_PII
    + N_FIELDS_MAINTENANCE
    + N_FIELDS_GEM
)


"""
What was the selection criteria? [Describe the process for selecting instances to include in the dataset, including any tools used.]
"""


def curation_page():
    st.session_state.card_dict["curation"] = st.session_state.card_dict.get(
        "curation", {}
    )
    with st.expander("Original Curation", expanded=False):
        key_pref = ["curation", "original"]
        st.session_state.card_dict["curation"]["original"] = st.session_state.card_dict[
            "curation"
        ].get("original", {})
        make_text_area(
            label="Original curation rationale",
            key_list=key_pref + ["rationale"],
            help="Describe the curation rationale behind the original dataset(s).",
        )
        make_text_area(
            label="What was the communicative goal?",
            key_list=key_pref + ["communicative"],
            help="Describe the communicative goal that the original dataset(s) was trying to represent.",
        )
        make_radio(
            label="Is the dataset aggregated from different data sources?",
            options=["no", "yes"],
            key_list=key_pref + ["is-aggregated"],
            help="e.g. Wikipedia, movi dialogues, etc.",
        )
        make_text_area(
            label="If yes, list the sources",
            key_list=key_pref + ["aggregated-sources"],
            help="Otherwise, type N/A",
        )
    with st.expander("Language Data", expanded=False):
        key_pref = ["curation", "language"]
        st.session_state.card_dict["curation"]["language"] = st.session_state.card_dict[
            "curation"
        ].get("language", {})
        make_multiselect(
            label="How was the language data obtained?",
            options=[
                "Found",
                "Created for the dataset",
                "Crowdsourced",
                "Machine-generated",
                "Other",
            ],
            key_list=key_pref + ["obtained"],
        )
        make_multiselect(
            label="If found, where from?",
            options=["Multiple websites", "Single website", "Offline media collection", "Other", "N/A"],
            key_list=key_pref + ["found"],
            help="select N/A if none of the language data was found",
        )
        make_multiselect(
            label="If crowdsourced, where from?",
            options=[
                "Amazon Mechanical Turk",
                "Other crowdworker platform",
                "Participatory experiment",
                "Other",
                "N/A",
            ],
            key_list=key_pref + ["crowdsourced"],
            help="select N/A if none of the language data was crowdsourced",
        )
        make_text_area(
            label="If created for the dataset, describe the creation process.",
            key_list=key_pref + ["created"],
        )
        make_text_area(
            label="What further information do we have on the language producers?",
            key_list=key_pref + ["producers-description"],
            help="Provide a description of the context in which the language was produced and who produced it.",
        )
        make_text_input(
            label="If text was machine-generated for the dataset, provide a link to the generation method if available (N/A otherwise).",
            key_list=key_pref + ["machine-generated"],
            help="if the generation code is unavailable, enter N/A",
        )
        make_selectbox(
            label="Was the text validated by a different worker or a data curator?",
            options=[
                "not validated",
                "validated by crowdworker",
                "validated by data curator",
                "other",
            ],
            key_list=key_pref + ["validated"],
            help="this question is about human or human-in-the-loop validation only",
        )
        make_multiselect(
            label="In what kind of organization did the curation happen?",
            options=["industry", "academic", "independent", "other"],
            key_list=key_pref + ["organization-type"],
        )
        make_text_input(
            label="Name the organization(s).",
            key_list=key_pref + ["organization-names"],
            help="comma-separated",
        )
        make_text_area(
            label="How was the text data pre-processed? (Enter N/A if the text was not pre-processed)",
            key_list=key_pref + ["pre-processed"],
            help="List the steps in preprocessing the data for the dataset. Enter N/A if no steps were taken.",
        )
        make_selectbox(
            label="Were text instances selected or filtered?",
            options=["not filtered", "manually", "algorithmically", "hybrid"],
            key_list=key_pref + ["is-filtered"],
        )
        make_text_area(
            label="What were the selection criteria?",
            key_list=key_pref + ["filtered-criteria"],
            help="Describe the process for selecting instances to include in the dataset, including any tools used. If no selection was done, enter N/A.",
        )
    with st.expander("Structured Annotations", expanded=False):
        key_pref = ["curation", "annotations"]
        st.session_state.card_dict["curation"][
            "annotations"
        ] = st.session_state.card_dict["curation"].get("annotations", {})
    with st.expander("Consent", expanded=False):
        key_pref = ["curation", "consent"]
        st.session_state.card_dict["curation"]["consent"] = st.session_state.card_dict[
            "curation"
        ].get("consent", {})
    with st.expander("Private Identifying Information (PII)", expanded=False):
        key_pref = ["curation", "pii"]
        st.session_state.card_dict["curation"]["pii"] = st.session_state.card_dict[
            "curation"
        ].get("pii", {})
    with st.expander("Maintenance", expanded=False):
        key_pref = ["curation", "maintenance"]
        st.session_state.card_dict["curation"][
            "maintenance"
        ] = st.session_state.card_dict["curation"].get("maintenance", {})
    with st.expander("GEM Additional Curation", expanded=False):
        key_pref = ["curation", "gem"]
        st.session_state.card_dict["curation"]["gem"] = st.session_state.card_dict[
            "curation"
        ].get("gem", {})


def curation_summary():
    total_filled = sum(
        [len(dct) for dct in st.session_state.card_dict.get("curation", {}).values()]
    )
    with st.expander(
        f"Dataset Curation Completion - {total_filled} of {N_FIELDS}", expanded=False
    ):
        completion_markdown = ""
        completion_markdown += (
            f"- **Overall competion:**\n  - {total_filled} of {N_FIELDS} fields\n"
        )
        completion_markdown += f"- **Sub-section - Original Curation:**\n  - {len(st.session_state.card_dict.get('curation', {}).get('original', {}))} of {N_FIELDS_ORIGINAL} fields\n"
        completion_markdown += f"- **Sub-section - Language Data:**\n  - {len(st.session_state.card_dict.get('curation', {}).get('language', {}))} of {N_FIELDS_LANGUAGE} fields\n"
        completion_markdown += f"- **Sub-section - Structured Annotations:**\n  - {len(st.session_state.card_dict.get('curation', {}).get('annotations', {}))} of {N_FIELDS_ANNOTATIONS} fields\n"
        completion_markdown += f"- **Sub-section - Consent:**\n  - {len(st.session_state.card_dict.get('curation', {}).get('consent', {}))} of {N_FIELDS_CONSENT} fields\n"
        completion_markdown += f"- **Sub-section - PII:**\n  - {len(st.session_state.card_dict.get('curation', {}).get('pii', {}))} of {N_FIELDS_PII} fields\n"
        completion_markdown += f"- **Sub-section - Maintenance:**\n  - {len(st.session_state.card_dict.get('curation', {}).get('maintenance', {}))} of {N_FIELDS_MAINTENANCE} fields\n"
        completion_markdown += f"- **Sub-section - GEM Curation:**\n  - {len(st.session_state.card_dict.get('curation', {}).get('gem', {}))} of {N_FIELDS_GEM} fields\n"
        st.markdown(completion_markdown)