|
import streamlit as st |
|
import json |
|
import pandas as pd |
|
import streamlit.components.v1 as components |
|
|
|
|
|
def load_jsonl(file_path): |
|
data = [] |
|
with open(file_path, 'r') as f: |
|
for line in f: |
|
data.append(json.loads(line)) |
|
return pd.DataFrame(data) |
|
|
|
|
|
def filter_by_keyword(df, keyword): |
|
return df[df.apply(lambda row: row.astype(str).str.contains(keyword).any(), axis=1)] |
|
|
|
|
|
def generate_html_with_textarea(text_to_speak): |
|
return f''' |
|
<!DOCTYPE html> |
|
<html> |
|
<head> |
|
<title>Read It Aloud</title> |
|
<script type="text/javascript"> |
|
function readAloud() {{ |
|
const text = document.getElementById("textArea").value; |
|
const speech = new SpeechSynthesisUtterance(text); |
|
window.speechSynthesis.speak(speech); |
|
}} |
|
</script> |
|
</head> |
|
<body> |
|
<h1>π Read It Aloud</h1> |
|
<textarea id="textArea" rows="10" cols="80"> |
|
{text_to_speak} |
|
</textarea> |
|
<br> |
|
<button onclick="readAloud()">π Read Aloud</button> |
|
</body> |
|
</html> |
|
''' |
|
|
|
|
|
st.title("USMLE Medical Questions Explorer with Speech Synthesis π") |
|
|
|
|
|
file_option = st.selectbox("Select file:", ["usmle_16.2MB.jsonl", "usmle_2.08MB.jsonl"]) |
|
st.write(f"You selected: {file_option}") |
|
|
|
|
|
large_data = load_jsonl("usmle_16.2MB.jsonl") |
|
small_data = load_jsonl("usmle_2.08MB.jsonl") |
|
|
|
data = small_data if file_option == "usmle_16.2MB.jsonl" else small_data |
|
|
|
|
|
top_20_terms = ['Heart', 'Lung', 'Pain', 'Memory', 'Kidney', 'Diabetes', 'Cancer', 'Infection', 'Virus', 'Bacteria', 'Neurology', 'Psychiatry', 'Gastrointestinal', 'Pediatrics', 'Oncology', 'Skin', 'Blood', 'Surgery', 'Epidemiology', 'Genetics'] |
|
|
|
|
|
with st.expander("Search by Common Terms π"): |
|
cols = st.columns(4) |
|
for term in top_20_terms: |
|
with cols[top_20_terms.index(term) % 4]: |
|
if st.button(f"{term}"): |
|
filtered_data = filter_by_keyword(data, term) |
|
st.write(f"Filtered Dataset by '{term}' π") |
|
st.dataframe(filtered_data) |
|
|
|
|
|
search_keyword = st.text_input("Or, enter a keyword to filter data:") |
|
if st.button("Search π΅οΈββοΈ"): |
|
filtered_data = filter_by_keyword(data, search_keyword) |
|
st.write(f"Filtered Dataset by '{search_keyword}' π") |
|
st.dataframe(filtered_data) |
|
|
|
|
|
if st.button("Read All Rows π"): |
|
if not filtered_data.empty: |
|
html_blocks = [] |
|
for idx, row in filtered_data.iterrows(): |
|
question_text = row.get("question", "No question field") |
|
documentHTML5 = generate_html_with_textarea(question_text) |
|
html_blocks.append(documentHTML5) |
|
all_html = ''.join(html_blocks) |
|
components.html(all_html, width=1280, height=1024) |
|
else: |
|
st.warning("No rows to read. π¨") |
|
|