awacke1's picture
Update app.py
3fddc37
raw
history blame
4.4 kB
import streamlit as st
import json
import pandas as pd
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
import streamlit.components.v1 as components
# Global variable to hold selected row index
selected_row_index = None
# Initialize an empty DataFrame
filtered_data = pd.DataFrame()
# Function to load JSONL file into a DataFrame
def load_jsonl(file_path):
data = []
with open(file_path, 'r') as f:
for line in f:
data.append(json.loads(line))
return pd.DataFrame(data)
# Function to filter DataFrame by keyword
def filter_by_keyword(df, keyword):
return df[df.apply(lambda row: row.astype(str).str.contains(keyword).any(), axis=1)]
# Function to generate HTML5 code with embedded text
def generate_html(question_text, answer_text):
return f'''
<!DOCTYPE html>
<html>
<head>
<title>Read It Aloud</title>
<script type="text/javascript">
function readAloud(id) {{
const text = document.getElementById(id).innerText;
const speech = new SpeechSynthesisUtterance(text);
window.speechSynthesis.speak(speech);
}}
</script>
</head>
<body>
<h1>πŸ”Š Read It Aloud</h1>
<p id="questionArea">{question_text}</p>
<button onclick="readAloud('questionArea')">πŸ”Š Read Question Aloud</button>
<p id="answerArea">{answer_text}</p>
<button onclick="readAloud('answerArea')">πŸ”Š Read Answer Aloud</button>
</body>
</html>
'''
# Streamlit App
st.title("Medical Licensing Exam Explorer with Speech Synthesis, Plotly and Seaborn πŸ“Š")
# Dropdown for file selection
file_option = st.selectbox("Select file:", ["small_file.jsonl", "large_file.jsonl"])
st.write(f"You selected: {file_option}")
# Load the data
small_data = load_jsonl("usmle_16.2MB.jsonl")
large_data = load_jsonl("usmle_2.08MB.jsonl")
# Show filtered data grid
if file_option == "small_file.jsonl":
data = small_data
else:
data = large_data
# Dropdown for file selection
file_option = st.selectbox("Select file:", ["small_file.jsonl", "large_file.jsonl"])
st.write(f"You selected: {file_option}")
# Show filtered data grid
if file_option == "small_file.jsonl":
data = small_data
else:
data = large_data
# Text input for search keyword
search_keyword = st.text_input("Enter a keyword to filter data (e.g., Heart, Lung, Pain, Memory):")
# Button to trigger search
if st.button("Search"):
filtered_data = filter_by_keyword(data, search_keyword)
st.write(f"Filtered Dataset by '{search_keyword}'")
selected_data = st.dataframe(filtered_data)
# Button to read selected row aloud
if st.button("Read Selected Row"):
global selected_row_index
if selected_row_index is not None:
selected_row = filtered_data.loc[selected_row_index]
question_text = selected_row.get("question", "No question field")
answer_text = selected_row.get("answer", "No answer field")
documentHTML5 = generate_html(question_text, answer_text)
components.html(documentHTML5, width=1280, height=1024)
else:
st.warning("Please select a row first.")
# Plotly and Seaborn charts for EDA
if st.button("Generate Charts"):
st.subheader("Plotly Charts πŸ“ˆ")
# 1. Scatter Plot
fig = px.scatter(data, x=data.columns[0], y=data.columns[1])
st.plotly_chart(fig)
# 2. Line Plot
fig = px.line(data, x=data.columns[0], y=data.columns[1])
st.plotly_chart(fig)
# 3. Bar Plot
fig = px.bar(data, x=data.columns[0], y=data.columns[1])
st.plotly_chart(fig)
# 4. Histogram
fig = px.histogram(data, x=data.columns[0])
st.plotly_chart(fig)
# 5. Box Plot
fig = px.box(data, x=data.columns[0], y=data.columns[1])
st.plotly_chart(fig)
st.subheader("Seaborn Charts πŸ“Š")
# 6. Violin Plot
fig, ax = plt.subplots()
sns.violinplot(x=data.columns[0], y=data.columns[1], data=data)
st.pyplot(fig)
# 7. Swarm Plot
fig, ax = plt.subplots()
sns.swarmplot(x=data.columns[0], y=data.columns[1], data=data)
st.pyplot(fig)
# 8. Pair Plot
fig = sns.pairplot(data)
st.pyplot(fig)
# 9. Heatmap
fig, ax = plt.subplots()
sns.heatmap(data.corr(), annot=True)
st.pyplot(fig)
# 10. Regplot (Regression Plot)
fig, ax = plt.subplots()
sns.regplot(x=data.columns[0], y=data.columns[1], data=data)
st.pyplot(fig)