Spaces:

MISTest
/

stats

Running

stats / app.py

Yara Kyrychenko

upd scale

426184d 22 days ago

3.83 kB

	import streamlit as st
	import pandas as pd
	import altair as alt

	st.set_page_config(
	page_title="MIST Stats",
	page_icon="🤔"
	)

	df = pd.read_csv('mist_stats.csv')

	st.header("Misinformation Susceptibility Test Statistics")
	st.subheader("Explore misinformation susceptibility profiles across 24 countries!")

	st.markdown("Data from [yourmist.streamlit.app](yourmist.streamlit.app) between June 19, 2023, and July 10, 2024, from individuals who completed the MIST and chose to share their score and all sociodemographic data with the researchers. Only countries with more than 250 complete submissions at the time of data collection are included.")

	countries = ["All countries"] + sorted(list(df["Country"].unique()))

	selected_country = st.selectbox("Select a country (or All countries):", countries)

	categorical_vars = ['Generation', 'Education', 'Political Leaning', 'Gender', 'Perceived Misinfo Discernment Ability']
	selected_var = st.selectbox("Select a variable to visualize:", categorical_vars)

	ordering_dict = {
	'Gender': ["Male", "Female", "Non-binary/Third"],
	'Generation': ["Generation Z", "Millennials", "Generation X", "Baby Boomers"] ,
	'Education': ["High School or Less",
	"Some University but no degree",
	"University Bachelors Degree",
	"Graduate or professional degree (e.g., MA, PhD, MD)"],
	'Political Leaning': ["Extremely liberal",
	"Liberal", "Slightly liberal", "Moderate",
	"Slightly conservative", "Conservative", "Extremely conservative"],
	'Perceived Misinfo Discernment Ability': [ "Very poor", "Poor", "Average", "Good", "Very good"]
	}

	df[selected_var] = pd.Categorical(df[selected_var], categories=ordering_dict[selected_var], ordered=True)

	filtered_df = df if selected_country == "All countries" else df[df["Country"] == selected_country]

	num_obs = filtered_df.shape[0]
	mean_score = filtered_df['Score'].mean().round(2)
	std_dev = filtered_df['Score'].std().round(2)

	plot_df = filtered_df.groupby([selected_var]).agg(avg_score=('Score', 'mean')).reset_index()
	plot_df['avg_score'] = plot_df['avg_score'].round(2)
	plot_df = plot_df.sort_values(by=selected_var)

	subtitle_text = [f'In {selected_country} by {selected_var.capitalize()}',
	f'N={num_obs}, Mean={mean_score}, SD={std_dev}']

	color_scale = alt.Scale(domain=ordering_dict[selected_var], scheme= 'viridis')

	chart = alt.Chart(plot_df).mark_bar().encode(
	x=alt.X('avg_score', title='Average MIST Score',scale=alt.Scale(domain=[0, 20])),
	y=alt.Y(selected_var, title=selected_var, sort=ordering_dict[selected_var]),
	color=alt.Color(selected_var, scale=color_scale, legend=None)
	).properties(
	width=800,
	height=600,
	title={
	"text": f'Average MIST Score',
	"subtitle": subtitle_text,
	"subtitleFontSize": 16,
	"anchor": "start" ,
	"limit": 1000,
	}
	)

	st.altair_chart(chart)

	filtered_df[selected_var] = pd.Categorical(filtered_df[selected_var],
	categories=ordering_dict[selected_var],
	ordered=True)
	filtered_df = filtered_df.sort_values(by=selected_var)

	histogram = alt.Chart(filtered_df).mark_bar().encode(
	x=alt.X('Score:O', title='MIST Score', sort=list(range(1, 21)),axis=alt.Axis(labelAngle=0),
	scale=alt.Scale(domain=list(range(0, 21)))),
	y=alt.Y('count()', title='Count'),
	color=alt.Color(selected_var, scale=color_scale, legend=alt.Legend(title=selected_var, orient="top-left", titleLimit=1000, labelLimit=500, columns=1,padding=10))
	).properties(
	width=800,
	height=600,
	title={
	"text": f'Distribution of MIST Scores',
	"subtitle": subtitle_text,
	"subtitleFontSize": 16,
	"anchor": "start" ,
	"limit": 1500
	}
	)

	st.altair_chart(histogram)