stats / app.py
Yara Kyrychenko
upd scale
426184d
import streamlit as st
import pandas as pd
import altair as alt
st.set_page_config(
page_title="MIST Stats",
page_icon="🤔"
)
df = pd.read_csv('mist_stats.csv')
st.header("Misinformation Susceptibility Test Statistics")
st.subheader("Explore misinformation susceptibility profiles across 24 countries!")
st.markdown("Data from [yourmist.streamlit.app](yourmist.streamlit.app) between June 19, 2023, and July 10, 2024, from individuals who completed the MIST and chose to share their score and all sociodemographic data with the researchers. Only countries with more than 250 complete submissions at the time of data collection are included.")
countries = ["All countries"] + sorted(list(df["Country"].unique()))
selected_country = st.selectbox("Select a country (or All countries):", countries)
categorical_vars = ['Generation', 'Education', 'Political Leaning', 'Gender', 'Perceived Misinfo Discernment Ability']
selected_var = st.selectbox("Select a variable to visualize:", categorical_vars)
ordering_dict = {
'Gender': ["Male", "Female", "Non-binary/Third"],
'Generation': ["Generation Z", "Millennials", "Generation X", "Baby Boomers"] ,
'Education': ["High School or Less",
"Some University but no degree",
"University Bachelors Degree",
"Graduate or professional degree (e.g., MA, PhD, MD)"],
'Political Leaning': ["Extremely liberal",
"Liberal", "Slightly liberal", "Moderate",
"Slightly conservative", "Conservative", "Extremely conservative"],
'Perceived Misinfo Discernment Ability': [ "Very poor", "Poor", "Average", "Good", "Very good"]
}
df[selected_var] = pd.Categorical(df[selected_var], categories=ordering_dict[selected_var], ordered=True)
filtered_df = df if selected_country == "All countries" else df[df["Country"] == selected_country]
num_obs = filtered_df.shape[0]
mean_score = filtered_df['Score'].mean().round(2)
std_dev = filtered_df['Score'].std().round(2)
plot_df = filtered_df.groupby([selected_var]).agg(avg_score=('Score', 'mean')).reset_index()
plot_df['avg_score'] = plot_df['avg_score'].round(2)
plot_df = plot_df.sort_values(by=selected_var)
subtitle_text = [f'In {selected_country} by {selected_var.capitalize()}',
f'N={num_obs}, Mean={mean_score}, SD={std_dev}']
color_scale = alt.Scale(domain=ordering_dict[selected_var], scheme= 'viridis')
chart = alt.Chart(plot_df).mark_bar().encode(
x=alt.X('avg_score', title='Average MIST Score',scale=alt.Scale(domain=[0, 20])),
y=alt.Y(selected_var, title=selected_var, sort=ordering_dict[selected_var]),
color=alt.Color(selected_var, scale=color_scale, legend=None)
).properties(
width=800,
height=600,
title={
"text": f'Average MIST Score',
"subtitle": subtitle_text,
"subtitleFontSize": 16,
"anchor": "start" ,
"limit": 1000,
}
)
st.altair_chart(chart)
filtered_df[selected_var] = pd.Categorical(filtered_df[selected_var],
categories=ordering_dict[selected_var],
ordered=True)
filtered_df = filtered_df.sort_values(by=selected_var)
histogram = alt.Chart(filtered_df).mark_bar().encode(
x=alt.X('Score:O', title='MIST Score', sort=list(range(1, 21)),axis=alt.Axis(labelAngle=0),
scale=alt.Scale(domain=list(range(0, 21)))),
y=alt.Y('count()', title='Count'),
color=alt.Color(selected_var, scale=color_scale, legend=alt.Legend(title=selected_var, orient="top-left", titleLimit=1000, labelLimit=500, columns=1,padding=10))
).properties(
width=800,
height=600,
title={
"text": f'Distribution of MIST Scores',
"subtitle": subtitle_text,
"subtitleFontSize": 16,
"anchor": "start" ,
"limit": 1500
}
)
st.altair_chart(histogram)