cmcmaster's picture
Update main.py
3f57bf8 verified
import gradio as gr
from datasets import load_dataset
import datetime, math
from calendar import month_name
import os
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.interval import IntervalTrigger
import atexit
HF_TOKEN = os.environ.get("HF_TOKEN")
DATASET_NAME = "cmcmaster/rheumatology-biologics-dataset-monthly"
UPDATE_INTERVAL = 1
def month_str_to_int(month: str) -> int:
return datetime.datetime.strptime(month.title(), "%B").month
def month_int_to_str(month_int: int) -> str:
return month_name[month_int]
def _is_nan(x):
return isinstance(x, float) and math.isnan(x)
def _fmt_int(x):
return "None" if x is None or _is_nan(x) else str(int(x))
def _fmt_streamlined(x):
# return integer string if present, else "None"
if x is None or _is_nan(x): return "None"
try:
return str(int(float(x)))
except Exception:
return "None"
def load_data():
try:
ds = load_dataset(DATASET_NAME, split="train")
# Determine latest schedule (max year, then max month within that year)
latest_year = max(ds['schedule_year'])
latest_year_rows = ds.filter(lambda r: r['schedule_year'] == latest_year)
latest_month_int = max(month_str_to_int(m) for m in latest_year_rows['schedule_month'])
latest_month_name = month_int_to_str(latest_month_int)
# Keep ONLY rows from the latest schedule
ds_latest = ds.filter(
lambda r: r['schedule_year'] == latest_year and month_str_to_int(r['schedule_month']) == latest_month_int
)
# Dropdown options derived from the latest schedule only
drugs = sorted(set(ds_latest['drug']))
brands = sorted(set(ds_latest['brand']))
formulations = sorted(set(ds_latest['formulation']))
indications = sorted(set(ds_latest['indication']))
treatment_phases = sorted(set(ds_latest['treatment_phase']))
hospital_types = sorted(set(ds_latest['hospital_type']))
last_updated = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
return {
'combinations': ds_latest, # <- only latest schedule rows
'drugs': drugs,
'brands': brands,
'formulations': formulations,
'indications': indications,
'treatment_phases': treatment_phases,
'hospital_types': hospital_types,
'latest_schedule': f"{latest_month_name} {latest_year}",
'last_refreshed': last_updated
}
except Exception as e:
print(f"An error occurred while loading data: {str(e)}")
return {
'combinations': [],
'drugs': [],
'brands': [],
'formulations': [],
'indications': [],
'treatment_phases': [],
'hospital_types': [],
'latest_schedule': 'Unknown',
'last_refreshed': 'Failed to load'
}
biologics_data = load_data()
def refresh_data():
global biologics_data
try:
print(f"Refreshing data at {datetime.datetime.now()}")
current_schedule = biologics_data.get('latest_schedule', '')
temp_data = load_data()
new_schedule = temp_data.get('latest_schedule', '')
if new_schedule != current_schedule:
biologics_data = temp_data
return f"✅ Data refreshed successfully at {biologics_data['last_refreshed']}. Latest schedule: {biologics_data['latest_schedule']}"
else:
return "Already using latest schedule"
except Exception as e:
return f"❌ Failed to refresh data: {str(e)}"
def check_for_updates():
global biologics_data
try:
current_schedule = biologics_data.get('latest_schedule', '')
temp_data = load_data()
new_schedule = temp_data.get('latest_schedule', '')
if new_schedule != current_schedule:
print(f"New schedule detected: {new_schedule} (was: {current_schedule})")
biologics_data = temp_data
print(f"Data automatically updated to {new_schedule}")
else:
print(f"No new data found. Current schedule: {current_schedule}")
except Exception as e:
print(f"Error checking for updates: {str(e)}")
def search_biologics(drug, brand, formulation, indication, treatment_phase, hospital_type, state):
results = state['combinations'].filter(
lambda x: (not drug or x['drug'] == drug) and
(not brand or x['brand'] == brand) and
(not formulation or x['formulation'] == formulation) and
(not indication or x['indication'] == indication) and
(not treatment_phase or x['treatment_phase'] == treatment_phase) and
(not hospital_type or x['hospital_type'] == hospital_type)
)
if len(results) == 0:
return "No results found."
# Everything is latest schedule already; render clean ints and streamlined code
out = []
for item in results:
out.append(f"""
### {item['drug']} ({item['brand']})
* **PBS Code:** [{item['pbs_code']}](https://www.pbs.gov.au/medicine/item/{item['pbs_code']})
* **Formulation:** {item['formulation']}
* **Indication:** {item['indication']}
* **Treatment Phase:** {item['treatment_phase']}
* **Maximum Prescribable Pack:** {_fmt_int(item['maximum_prescribable_pack'])}
* **Maximum Quantity Units:** {_fmt_int(item['maximum_quantity_units'])}
* **Number of Repeats:** {_fmt_int(item['number_of_repeats'])}
* **Streamlined Code:** {_fmt_streamlined(item.get('streamlined_code'))}
* **Authority Method:** {item['authority_method'].replace('_', ' ').title()}
* **Online Application:** {'Yes' if item['online_application'] else 'No'}
* **Hospital Type:** {item['hospital_type']}
* **Schedule:** {state['latest_schedule']} (current)
---
""")
return "".join(out)
def create_interface():
with gr.Blocks(title="Biologics Prescriber Helper") as demo:
gr.Markdown("# Biologics Prescriber Helper")
with gr.Row():
with gr.Column():
data_info = gr.Markdown(f"**Current Schedule:** {biologics_data['latest_schedule']} | **Last Refreshed:** {biologics_data['last_refreshed']}")
with gr.Column():
refresh_btn = gr.Button("🔄 Refresh Data", variant="secondary", size="sm")
refresh_status = gr.Markdown("")
session_data = gr.State(biologics_data)
def refresh_and_update():
status = refresh_data()
updated_info = f"**Current Schedule:** {biologics_data['latest_schedule']} | **Last Refreshed:** {biologics_data['last_refreshed']}"
return (
status, updated_info,
gr.Dropdown(choices=[""] + biologics_data['drugs'], value=""),
gr.Dropdown(choices=[""] + biologics_data['brands'], value=""),
gr.Dropdown(choices=[""] + biologics_data['formulations'], value=""),
gr.Dropdown(choices=[""] + biologics_data['indications'], value=""),
gr.Dropdown(choices=[""] + biologics_data['treatment_phases'], value=""),
gr.Dropdown(choices=[""] + biologics_data['hospital_types'], value=""),
biologics_data
)
def update_dropdown_choices(drug, brand, formulation, indication, treatment_phase, hospital_type, state):
filtered = state['combinations'].filter(
lambda x: (not drug or x['drug'] == drug) and
(not brand or x['brand'] == brand) and
(not formulation or x['formulation'] == formulation) and
(not indication or x['indication'] == indication) and
(not treatment_phase or x['treatment_phase'] == treatment_phase) and
(not hospital_type or x['hospital_type'] == hospital_type)
)
available = {
'drugs': [""] + sorted(set(filtered['drug'])),
'brands': [""] + sorted(set(filtered['brand'])),
'formulations': [""] + sorted(set(filtered['formulation'])),
'indications': [""] + sorted(set(filtered['indication'])),
'treatment_phases': [""] + sorted(set(filtered['treatment_phase'])),
'hospital_types': [""] + sorted(set(filtered['hospital_type']))
}
return (
gr.Dropdown(choices=available['drugs'], value=drug if drug in available['drugs'] else ""),
gr.Dropdown(choices=available['brands'], value=brand if brand in available['brands'] else ""),
gr.Dropdown(choices=available['formulations'], value=formulation if formulation in available['formulations'] else ""),
gr.Dropdown(choices=available['indications'], value=indication if indication in available['indications'] else ""),
gr.Dropdown(choices=available['treatment_phases'], value=treatment_phase if treatment_phase in available['treatment_phases'] else ""),
gr.Dropdown(choices=available['hospital_types'], value=hospital_type if hospital_type in available['hospital_types'] else ""),
state
)
with gr.Row():
with gr.Column():
drug = gr.Dropdown(choices=[""] + biologics_data['drugs'], label="Drug", value="", interactive=True)
brand = gr.Dropdown(choices=[""] + biologics_data['brands'], label="Brand", value="", interactive=True)
formulation = gr.Dropdown(choices=[""] + biologics_data['formulations'], label="Formulation", value="", interactive=True)
with gr.Column():
indication = gr.Dropdown(choices=[""] + biologics_data['indications'], label="Indication", value="", interactive=True)
treatment_phase = gr.Dropdown(choices=[""] + biologics_data['treatment_phases'], label="Treatment Phase", value="", interactive=True)
hospital_type = gr.Dropdown(choices=[""] + biologics_data['hospital_types'], label="Hospital Type", value="", interactive=True)
with gr.Row():
search_btn = gr.Button("Search", variant="primary")
clear_btn = gr.Button("Reset")
results = gr.Markdown()
def reset_inputs(state):
return (*update_dropdown_choices("", "", "", "", "", "", state)[:-1], state)
for dd in [drug, brand, formulation, indication, treatment_phase, hospital_type]:
dd.change(fn=update_dropdown_choices,
inputs=[drug, brand, formulation, indication, treatment_phase, hospital_type, session_data],
outputs=[drug, brand, formulation, indication, treatment_phase, hospital_type, session_data])
search_btn.click(fn=search_biologics,
inputs=[drug, brand, formulation, indication, treatment_phase, hospital_type, session_data],
outputs=results)
clear_btn.click(fn=reset_inputs, inputs=[session_data],
outputs=[drug, brand, formulation, indication, treatment_phase, hospital_type, session_data])
refresh_btn.click(fn=refresh_and_update, inputs=[],
outputs=[refresh_status, data_info, drug, brand, formulation, indication, treatment_phase, hospital_type, session_data])
return demo
scheduler = BackgroundScheduler()
scheduler.add_job(func=check_for_updates, trigger=IntervalTrigger(days=UPDATE_INTERVAL),
id='check_updates', name='Check for Data Updates', replace_existing=True)
scheduler.start()
atexit.register(lambda: scheduler.shutdown())
if __name__ == "__main__":
demo = create_interface()
demo.launch()
# TODO
# Add information about how to do drugs with two different intiial codes (cosentyx, cimzia - use balance of supply)