|
import streamlit as st |
|
import json |
|
import pandas as pd |
|
import plotly.express as px |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
import streamlit.components.v1 as components |
|
|
|
|
|
selected_row_index = None |
|
|
|
|
|
filtered_data = pd.DataFrame() |
|
|
|
|
|
def load_jsonl(file_path): |
|
data = [] |
|
with open(file_path, 'r') as f: |
|
for line in f: |
|
data.append(json.loads(line)) |
|
return pd.DataFrame(data) |
|
|
|
|
|
def filter_by_keyword(df, keyword): |
|
return df[df.apply(lambda row: row.astype(str).str.contains(keyword).any(), axis=1)] |
|
|
|
|
|
|
|
def generate_html(question_text, answer_text): |
|
return f''' |
|
<!DOCTYPE html> |
|
<html> |
|
<head> |
|
<title>Read It Aloud</title> |
|
<script type="text/javascript"> |
|
function readAloud(id) {{ |
|
const text = document.getElementById(id).innerText; |
|
const speech = new SpeechSynthesisUtterance(text); |
|
window.speechSynthesis.speak(speech); |
|
}} |
|
</script> |
|
</head> |
|
<body> |
|
<h1>π Read It Aloud</h1> |
|
<p id="questionArea">{question_text}</p> |
|
<button onclick="readAloud('questionArea')">π Read Question Aloud</button> |
|
<p id="answerArea">{answer_text}</p> |
|
<button onclick="readAloud('answerArea')">π Read Answer Aloud</button> |
|
</body> |
|
</html> |
|
''' |
|
|
|
|
|
st.title("Medical Licensing Exam Explorer with Speech Synthesis, Plotly and Seaborn π") |
|
|
|
|
|
file_option = st.selectbox("Select file:", ["small_file.jsonl", "large_file.jsonl"]) |
|
st.write(f"You selected: {file_option}") |
|
|
|
|
|
small_data = load_jsonl("usmle_16.2MB.jsonl") |
|
large_data = load_jsonl("usmle_2.08MB.jsonl") |
|
|
|
|
|
if file_option == "small_file.jsonl": |
|
data = small_data |
|
else: |
|
data = large_data |
|
|
|
|
|
|
|
file_option = st.selectbox("Select file:", ["small_file.jsonl", "large_file.jsonl"]) |
|
st.write(f"You selected: {file_option}") |
|
|
|
|
|
if file_option == "small_file.jsonl": |
|
data = small_data |
|
else: |
|
data = large_data |
|
|
|
|
|
|
|
search_keyword = st.text_input("Enter a keyword to filter data (e.g., Heart, Lung, Pain, Memory):") |
|
|
|
|
|
if st.button("Search"): |
|
filtered_data = filter_by_keyword(data, search_keyword) |
|
st.write(f"Filtered Dataset by '{search_keyword}'") |
|
selected_data = st.dataframe(filtered_data) |
|
|
|
|
|
if st.button("Read Selected Row"): |
|
global selected_row_index |
|
if selected_row_index is not None: |
|
selected_row = filtered_data.loc[selected_row_index] |
|
question_text = selected_row.get("question", "No question field") |
|
answer_text = selected_row.get("answer", "No answer field") |
|
|
|
documentHTML5 = generate_html(question_text, answer_text) |
|
components.html(documentHTML5, width=1280, height=1024) |
|
else: |
|
st.warning("Please select a row first.") |
|
|
|
|
|
if st.button("Generate Charts"): |
|
st.subheader("Plotly Charts π") |
|
|
|
|
|
fig = px.scatter(data, x=data.columns[0], y=data.columns[1]) |
|
st.plotly_chart(fig) |
|
|
|
|
|
fig = px.line(data, x=data.columns[0], y=data.columns[1]) |
|
st.plotly_chart(fig) |
|
|
|
|
|
fig = px.bar(data, x=data.columns[0], y=data.columns[1]) |
|
st.plotly_chart(fig) |
|
|
|
|
|
fig = px.histogram(data, x=data.columns[0]) |
|
st.plotly_chart(fig) |
|
|
|
|
|
fig = px.box(data, x=data.columns[0], y=data.columns[1]) |
|
st.plotly_chart(fig) |
|
|
|
st.subheader("Seaborn Charts π") |
|
|
|
|
|
fig, ax = plt.subplots() |
|
sns.violinplot(x=data.columns[0], y=data.columns[1], data=data) |
|
st.pyplot(fig) |
|
|
|
|
|
fig, ax = plt.subplots() |
|
sns.swarmplot(x=data.columns[0], y=data.columns[1], data=data) |
|
st.pyplot(fig) |
|
|
|
|
|
fig = sns.pairplot(data) |
|
st.pyplot(fig) |
|
|
|
|
|
fig, ax = plt.subplots() |
|
sns.heatmap(data.corr(), annot=True) |
|
st.pyplot(fig) |
|
|
|
|
|
fig, ax = plt.subplots() |
|
sns.regplot(x=data.columns[0], y=data.columns[1], data=data) |
|
st.pyplot(fig) |