Spaces:
Running
Running
import gradio as gr | |
from datasets import load_dataset | |
import datetime, math | |
from calendar import month_name | |
import os | |
from apscheduler.schedulers.background import BackgroundScheduler | |
from apscheduler.triggers.interval import IntervalTrigger | |
import atexit | |
HF_TOKEN = os.environ.get("HF_TOKEN") | |
DATASET_NAME = "cmcmaster/rheumatology-biologics-dataset-monthly" | |
UPDATE_INTERVAL = 1 | |
def month_str_to_int(month: str) -> int: | |
return datetime.datetime.strptime(month.title(), "%B").month | |
def month_int_to_str(month_int: int) -> str: | |
return month_name[month_int] | |
def _is_nan(x): | |
return isinstance(x, float) and math.isnan(x) | |
def _fmt_int(x): | |
return "None" if x is None or _is_nan(x) else str(int(x)) | |
def _fmt_streamlined(x): | |
# return integer string if present, else "None" | |
if x is None or _is_nan(x): return "None" | |
try: | |
return str(int(float(x))) | |
except Exception: | |
return "None" | |
def load_data(): | |
try: | |
ds = load_dataset(DATASET_NAME, split="train") | |
# Determine latest schedule (max year, then max month within that year) | |
latest_year = max(ds['schedule_year']) | |
latest_year_rows = ds.filter(lambda r: r['schedule_year'] == latest_year) | |
latest_month_int = max(month_str_to_int(m) for m in latest_year_rows['schedule_month']) | |
latest_month_name = month_int_to_str(latest_month_int) | |
# Keep ONLY rows from the latest schedule | |
ds_latest = ds.filter( | |
lambda r: r['schedule_year'] == latest_year and month_str_to_int(r['schedule_month']) == latest_month_int | |
) | |
# Dropdown options derived from the latest schedule only | |
drugs = sorted(set(ds_latest['drug'])) | |
brands = sorted(set(ds_latest['brand'])) | |
formulations = sorted(set(ds_latest['formulation'])) | |
indications = sorted(set(ds_latest['indication'])) | |
treatment_phases = sorted(set(ds_latest['treatment_phase'])) | |
hospital_types = sorted(set(ds_latest['hospital_type'])) | |
last_updated = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
return { | |
'combinations': ds_latest, # <- only latest schedule rows | |
'drugs': drugs, | |
'brands': brands, | |
'formulations': formulations, | |
'indications': indications, | |
'treatment_phases': treatment_phases, | |
'hospital_types': hospital_types, | |
'latest_schedule': f"{latest_month_name} {latest_year}", | |
'last_refreshed': last_updated | |
} | |
except Exception as e: | |
print(f"An error occurred while loading data: {str(e)}") | |
return { | |
'combinations': [], | |
'drugs': [], | |
'brands': [], | |
'formulations': [], | |
'indications': [], | |
'treatment_phases': [], | |
'hospital_types': [], | |
'latest_schedule': 'Unknown', | |
'last_refreshed': 'Failed to load' | |
} | |
biologics_data = load_data() | |
def refresh_data(): | |
global biologics_data | |
try: | |
print(f"Refreshing data at {datetime.datetime.now()}") | |
current_schedule = biologics_data.get('latest_schedule', '') | |
temp_data = load_data() | |
new_schedule = temp_data.get('latest_schedule', '') | |
if new_schedule != current_schedule: | |
biologics_data = temp_data | |
return f"✅ Data refreshed successfully at {biologics_data['last_refreshed']}. Latest schedule: {biologics_data['latest_schedule']}" | |
else: | |
return "Already using latest schedule" | |
except Exception as e: | |
return f"❌ Failed to refresh data: {str(e)}" | |
def check_for_updates(): | |
global biologics_data | |
try: | |
current_schedule = biologics_data.get('latest_schedule', '') | |
temp_data = load_data() | |
new_schedule = temp_data.get('latest_schedule', '') | |
if new_schedule != current_schedule: | |
print(f"New schedule detected: {new_schedule} (was: {current_schedule})") | |
biologics_data = temp_data | |
print(f"Data automatically updated to {new_schedule}") | |
else: | |
print(f"No new data found. Current schedule: {current_schedule}") | |
except Exception as e: | |
print(f"Error checking for updates: {str(e)}") | |
def search_biologics(drug, brand, formulation, indication, treatment_phase, hospital_type, state): | |
results = state['combinations'].filter( | |
lambda x: (not drug or x['drug'] == drug) and | |
(not brand or x['brand'] == brand) and | |
(not formulation or x['formulation'] == formulation) and | |
(not indication or x['indication'] == indication) and | |
(not treatment_phase or x['treatment_phase'] == treatment_phase) and | |
(not hospital_type or x['hospital_type'] == hospital_type) | |
) | |
if len(results) == 0: | |
return "No results found." | |
# Everything is latest schedule already; render clean ints and streamlined code | |
out = [] | |
for item in results: | |
out.append(f""" | |
### {item['drug']} ({item['brand']}) | |
* **PBS Code:** [{item['pbs_code']}](https://www.pbs.gov.au/medicine/item/{item['pbs_code']}) | |
* **Formulation:** {item['formulation']} | |
* **Indication:** {item['indication']} | |
* **Treatment Phase:** {item['treatment_phase']} | |
* **Maximum Prescribable Pack:** {_fmt_int(item['maximum_prescribable_pack'])} | |
* **Maximum Quantity Units:** {_fmt_int(item['maximum_quantity_units'])} | |
* **Number of Repeats:** {_fmt_int(item['number_of_repeats'])} | |
* **Streamlined Code:** {_fmt_streamlined(item.get('streamlined_code'))} | |
* **Authority Method:** {item['authority_method'].replace('_', ' ').title()} | |
* **Online Application:** {'Yes' if item['online_application'] else 'No'} | |
* **Hospital Type:** {item['hospital_type']} | |
* **Schedule:** {state['latest_schedule']} (current) | |
--- | |
""") | |
return "".join(out) | |
def create_interface(): | |
with gr.Blocks(title="Biologics Prescriber Helper") as demo: | |
gr.Markdown("# Biologics Prescriber Helper") | |
with gr.Row(): | |
with gr.Column(): | |
data_info = gr.Markdown(f"**Current Schedule:** {biologics_data['latest_schedule']} | **Last Refreshed:** {biologics_data['last_refreshed']}") | |
with gr.Column(): | |
refresh_btn = gr.Button("🔄 Refresh Data", variant="secondary", size="sm") | |
refresh_status = gr.Markdown("") | |
session_data = gr.State(biologics_data) | |
def refresh_and_update(): | |
status = refresh_data() | |
updated_info = f"**Current Schedule:** {biologics_data['latest_schedule']} | **Last Refreshed:** {biologics_data['last_refreshed']}" | |
return ( | |
status, updated_info, | |
gr.Dropdown(choices=[""] + biologics_data['drugs'], value=""), | |
gr.Dropdown(choices=[""] + biologics_data['brands'], value=""), | |
gr.Dropdown(choices=[""] + biologics_data['formulations'], value=""), | |
gr.Dropdown(choices=[""] + biologics_data['indications'], value=""), | |
gr.Dropdown(choices=[""] + biologics_data['treatment_phases'], value=""), | |
gr.Dropdown(choices=[""] + biologics_data['hospital_types'], value=""), | |
biologics_data | |
) | |
def update_dropdown_choices(drug, brand, formulation, indication, treatment_phase, hospital_type, state): | |
filtered = state['combinations'].filter( | |
lambda x: (not drug or x['drug'] == drug) and | |
(not brand or x['brand'] == brand) and | |
(not formulation or x['formulation'] == formulation) and | |
(not indication or x['indication'] == indication) and | |
(not treatment_phase or x['treatment_phase'] == treatment_phase) and | |
(not hospital_type or x['hospital_type'] == hospital_type) | |
) | |
available = { | |
'drugs': [""] + sorted(set(filtered['drug'])), | |
'brands': [""] + sorted(set(filtered['brand'])), | |
'formulations': [""] + sorted(set(filtered['formulation'])), | |
'indications': [""] + sorted(set(filtered['indication'])), | |
'treatment_phases': [""] + sorted(set(filtered['treatment_phase'])), | |
'hospital_types': [""] + sorted(set(filtered['hospital_type'])) | |
} | |
return ( | |
gr.Dropdown(choices=available['drugs'], value=drug if drug in available['drugs'] else ""), | |
gr.Dropdown(choices=available['brands'], value=brand if brand in available['brands'] else ""), | |
gr.Dropdown(choices=available['formulations'], value=formulation if formulation in available['formulations'] else ""), | |
gr.Dropdown(choices=available['indications'], value=indication if indication in available['indications'] else ""), | |
gr.Dropdown(choices=available['treatment_phases'], value=treatment_phase if treatment_phase in available['treatment_phases'] else ""), | |
gr.Dropdown(choices=available['hospital_types'], value=hospital_type if hospital_type in available['hospital_types'] else ""), | |
state | |
) | |
with gr.Row(): | |
with gr.Column(): | |
drug = gr.Dropdown(choices=[""] + biologics_data['drugs'], label="Drug", value="", interactive=True) | |
brand = gr.Dropdown(choices=[""] + biologics_data['brands'], label="Brand", value="", interactive=True) | |
formulation = gr.Dropdown(choices=[""] + biologics_data['formulations'], label="Formulation", value="", interactive=True) | |
with gr.Column(): | |
indication = gr.Dropdown(choices=[""] + biologics_data['indications'], label="Indication", value="", interactive=True) | |
treatment_phase = gr.Dropdown(choices=[""] + biologics_data['treatment_phases'], label="Treatment Phase", value="", interactive=True) | |
hospital_type = gr.Dropdown(choices=[""] + biologics_data['hospital_types'], label="Hospital Type", value="", interactive=True) | |
with gr.Row(): | |
search_btn = gr.Button("Search", variant="primary") | |
clear_btn = gr.Button("Reset") | |
results = gr.Markdown() | |
def reset_inputs(state): | |
return (*update_dropdown_choices("", "", "", "", "", "", state)[:-1], state) | |
for dd in [drug, brand, formulation, indication, treatment_phase, hospital_type]: | |
dd.change(fn=update_dropdown_choices, | |
inputs=[drug, brand, formulation, indication, treatment_phase, hospital_type, session_data], | |
outputs=[drug, brand, formulation, indication, treatment_phase, hospital_type, session_data]) | |
search_btn.click(fn=search_biologics, | |
inputs=[drug, brand, formulation, indication, treatment_phase, hospital_type, session_data], | |
outputs=results) | |
clear_btn.click(fn=reset_inputs, inputs=[session_data], | |
outputs=[drug, brand, formulation, indication, treatment_phase, hospital_type, session_data]) | |
refresh_btn.click(fn=refresh_and_update, inputs=[], | |
outputs=[refresh_status, data_info, drug, brand, formulation, indication, treatment_phase, hospital_type, session_data]) | |
return demo | |
scheduler = BackgroundScheduler() | |
scheduler.add_job(func=check_for_updates, trigger=IntervalTrigger(days=UPDATE_INTERVAL), | |
id='check_updates', name='Check for Data Updates', replace_existing=True) | |
scheduler.start() | |
atexit.register(lambda: scheduler.shutdown()) | |
if __name__ == "__main__": | |
demo = create_interface() | |
demo.launch() | |
# TODO | |
# Add information about how to do drugs with two different intiial codes (cosentyx, cimzia - use balance of supply) |