import streamlit as st import json import pandas as pd import plotly.express as px import seaborn as sns import matplotlib.pyplot as plt import streamlit.components.v1 as components # Global variable to hold selected row index selected_row_index = None # Initialize an empty DataFrame filtered_data = pd.DataFrame() # Function to load JSONL file into a DataFrame def load_jsonl(file_path): data = [] with open(file_path, 'r') as f: for line in f: data.append(json.loads(line)) return pd.DataFrame(data) # Function to filter DataFrame by keyword def filter_by_keyword(df, keyword): return df[df.apply(lambda row: row.astype(str).str.contains(keyword).any(), axis=1)] # Function to generate HTML5 code with embedded text def generate_html(question_text, answer_text): return f'''
{question_text}
{answer_text}
''' # Streamlit App st.title("Medical Licensing Exam Explorer with Speech Synthesis, Plotly and Seaborn 📊") # Dropdown for file selection file_option = st.selectbox("Select file:", ["small_file.jsonl", "large_file.jsonl"]) st.write(f"You selected: {file_option}") # Load the data small_data = load_jsonl("usmle_16.2MB.jsonl") large_data = load_jsonl("usmle_2.08MB.jsonl") # Show filtered data grid if file_option == "small_file.jsonl": data = small_data else: data = large_data # Dropdown for file selection file_option = st.selectbox("Select file:", ["small_file.jsonl", "large_file.jsonl"]) st.write(f"You selected: {file_option}") # Show filtered data grid if file_option == "small_file.jsonl": data = small_data else: data = large_data # Text input for search keyword search_keyword = st.text_input("Enter a keyword to filter data (e.g., Heart, Lung, Pain, Memory):") # Button to trigger search if st.button("Search"): filtered_data = filter_by_keyword(data, search_keyword) st.write(f"Filtered Dataset by '{search_keyword}'") selected_data = st.dataframe(filtered_data) # Button to read selected row aloud if st.button("Read Selected Row"): global selected_row_index if selected_row_index is not None: selected_row = filtered_data.loc[selected_row_index] question_text = selected_row.get("question", "No question field") answer_text = selected_row.get("answer", "No answer field") documentHTML5 = generate_html(question_text, answer_text) components.html(documentHTML5, width=1280, height=1024) else: st.warning("Please select a row first.") # Plotly and Seaborn charts for EDA if st.button("Generate Charts"): st.subheader("Plotly Charts 📈") # 1. Scatter Plot fig = px.scatter(data, x=data.columns[0], y=data.columns[1]) st.plotly_chart(fig) # 2. Line Plot fig = px.line(data, x=data.columns[0], y=data.columns[1]) st.plotly_chart(fig) # 3. Bar Plot fig = px.bar(data, x=data.columns[0], y=data.columns[1]) st.plotly_chart(fig) # 4. Histogram fig = px.histogram(data, x=data.columns[0]) st.plotly_chart(fig) # 5. Box Plot fig = px.box(data, x=data.columns[0], y=data.columns[1]) st.plotly_chart(fig) st.subheader("Seaborn Charts 📊") # 6. Violin Plot fig, ax = plt.subplots() sns.violinplot(x=data.columns[0], y=data.columns[1], data=data) st.pyplot(fig) # 7. Swarm Plot fig, ax = plt.subplots() sns.swarmplot(x=data.columns[0], y=data.columns[1], data=data) st.pyplot(fig) # 8. Pair Plot fig = sns.pairplot(data) st.pyplot(fig) # 9. Heatmap fig, ax = plt.subplots() sns.heatmap(data.corr(), annot=True) st.pyplot(fig) # 10. Regplot (Regression Plot) fig, ax = plt.subplots() sns.regplot(x=data.columns[0], y=data.columns[1], data=data) st.pyplot(fig)