import gradio as gr
import pandas as pd
import torch
import torch.nn.functional as F
import tempfile
from sentence_transformers import SentenceTransformer
from safetensors import safe_open
from transformers import pipeline, AutoTokenizer

# Load trial spaces data
trial_spaces = pd.read_csv('ctgov_all_trials_trial_space_lineitems_10-31-24.csv')

# Load embedding model
embedding_model = SentenceTransformer('ksg-dfci/TrialSpace', trust_remote_code=True)

# Load precomputed trial space embeddings
with safe_open("trial_space_embeddings.safetensors", framework="pt") as f:
    trial_space_embeddings = f.get_tensor("space_embeddings")

# Load checker pipeline
tokenizer = AutoTokenizer.from_pretrained("roberta-large")
checker_pipe = pipeline(
    'text-classification', 
    'ksg-dfci/TrialChecker', 
    tokenizer=tokenizer, 
    truncation=True, 
    padding='max_length', 
    max_length=512
)

def match_clinical_trials_dropdown(patient_summary: str, max_results_str: str):
    """
    1) Runs the trial matching logic.
    2) Returns a Dropdown (with the matched trials) and a DataFrame (for further use).
    3) The user-supplied max_results_str is converted to an int (1-50).
    """
    # Parse the max_results input
    try:
        max_results = int(max_results_str)
    except ValueError:
        max_results = 10  # if invalid input, default to 10

    # Clamp within [1, 50]
    if max_results < 1:
        max_results = 1
    elif max_results > 50:
        max_results = 50

    # 1. Encode user input
    patient_embedding = embedding_model.encode([patient_summary], convert_to_tensor=True)

    # 2. Compute similarities
    similarities = F.cosine_similarity(patient_embedding, trial_space_embeddings)

    # 3. Pull top 'max_results'
    sorted_similarities, sorted_indices = torch.sort(similarities, descending=True)
    top_indices = sorted_indices[:max_results].cpu().numpy()

    # 4. Build DataFrame
    relevant_spaces = trial_spaces.iloc[top_indices].this_space
    relevant_nctid = trial_spaces.iloc[top_indices].nct_id
    relevant_title = trial_spaces.iloc[top_indices].title
    relevant_brief_summary = trial_spaces.iloc[top_indices].brief_summary
    relevant_eligibility_criteria = trial_spaces.iloc[top_indices].eligibility_criteria

    analysis = pd.DataFrame({
        'patient_summary_query': patient_summary,
        'nct_id': relevant_nctid,
        'trial_title': relevant_title,
        'trial_brief_summary': relevant_brief_summary,
        'trial_eligibility_criteria': relevant_eligibility_criteria,
        'this_space': relevant_spaces,
    }).reset_index(drop=True)

    # 5. Prepare for checker pipeline
    analysis['pt_trial_pair'] = (
        analysis['this_space']
        + "\nNow here is the patient summary:"
        + analysis['patient_summary_query']
    )

    # 6. Run checker pipeline
    classifier_results = checker_pipe(analysis['pt_trial_pair'].tolist())
    analysis['trial_checker_result'] = [x['label'] for x in classifier_results]
    analysis['trial_checker_score'] = [x['score'] for x in classifier_results]

    # 7. Restrict to POSITIVE results only
    analysis = analysis[analysis.trial_checker_result == 'POSITIVE'].reset_index(drop=True)

    # 8. Final columns
    out_df = analysis[[
        'patient_summary_query',
        'nct_id',
        'trial_title',
        'trial_brief_summary',
        'trial_eligibility_criteria',
        'this_space',
        'trial_checker_result',
        'trial_checker_score'
    ]]

    # Build the dropdown choices, e.g., "1. NCT001 - Some Title"
    dropdown_options = []
    for i, row in out_df.iterrows():
        option_str = f"{i+1}. {row['nct_id']} - {row['trial_title']}"
        dropdown_options.append(option_str)

    # If we have no results, keep the dropdown empty
    if len(dropdown_options) == 0:
        return gr.Dropdown(choices=[], interactive=True, value=None), out_df

    # Otherwise, pick the first item as the default
    return (
        gr.Dropdown(choices=dropdown_options, interactive=True, value=dropdown_options[0]), 
        out_df
    )

def show_selected_trial(selected_option: str, df: pd.DataFrame):
    """
    1) Given the selected dropdown option, e.g. "1. NCT001 - Some Title"
    2) Find the row in df and build a summary string.
    """
    if not selected_option:
        return ""

    # Parse the index from "1. NCT001 - Some Title"
    chosen_index_str = selected_option.split(".")[0].strip()
    try:
        chosen_index = int(chosen_index_str) - 1
    except ValueError:
        return "No data found for the selected trial."

    if chosen_index < 0 or chosen_index >= len(df):
        return "No data found for the selected trial."

    record = df.iloc[chosen_index].to_dict()
    details = (
        f"Patient Summary Query: {record['patient_summary_query']}\n\n"
        f"NCT ID: {record['nct_id']}\n"
        f"Trial Title: {record['trial_title']}\n\n"
        f"Trial Space: {record['this_space']}\n\n"
        f"Trial Checker Result: {record['trial_checker_result']}\n"
        f"Trial Checker Score: {record['trial_checker_score']}\n\n"
        f"Brief Summary: {record['trial_brief_summary']}\n\n"
        f"Full Eligibility Criteria: {record['trial_eligibility_criteria']}\n\n"
    )
    return details

def export_results(df: pd.DataFrame):
    """
    Saves the DataFrame to a temporary CSV file so Gradio can provide a downloadable link.
    """
    temp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
    df.to_csv(temp.name, index=False)
    return temp.name

# A little CSS for the input boxes
custom_css = """
#input_box textarea {
    width: 600px !important;
    height: 250px !important;
}
"""

with gr.Blocks(css=custom_css) as demo:
    # Intro text
    gr.HTML("""
    <h3>Demonstration version of clinical trial search based on MatchMiner-AI</h3>
    <p>Based on clinicaltrials.gov cancer trials export 10/31/24.</p>
    <p>Queries take approximately 30 seconds to run per ten results returned, 
       since demo is running on a small CPU instance.</p>
    <p>Disclaimers:</p>
    <p>1. Not a clinical decision support tool. Queries are not saved, but do not input protected health information.</p>
    <p>2. AI-extracted trial "spaces" and candidate matches may contain errors</p>
    <p>3. Will not necessarily return all trials from clinicaltrials.gov that match a given query</p>
    <p>4. Under active development; interface and underlying models will change</p>
    <p>5. For better results, spell out cancer types (eg, enter "acute myeloid leukemia" rather than "AML")
    """)

    # Textbox for patient summary
    patient_summary_input = gr.Textbox(
        label="Enter Patient Summary",
        elem_id="input_box",
        value="Cancer type: Non-small cell lung cancer.  Histology: Adenocarcinoma.  Extent of disease: Metastatic.  Prior treatment: Pembrolizumab.  Biomarkers: PD-L1 high, KRAS G12C mutant."
    )

    # Textbox for max results
    max_results_input = gr.Textbox(
        label="Enter the maximum number of results to return (1-50)",
        value="10"  # default
    )

    # Button to run the matching
    submit_btn = gr.Button("Find Matches")

    # We'll store the DataFrame in a State for CSV export + reference
    results_state = gr.State()

    # Dropdown (initially empty)
    trial_dropdown = gr.Dropdown(
        label="Select a Trial",
        choices=[],
        value=None,
        interactive=True
    )

    # Textbox for showing details of the selected trial
    trial_details_box = gr.Textbox(
        label="Selected Trial Details",
        lines=12,
        interactive=False
    )

    # Export button + file
    export_btn = gr.Button("Export Results")
    download_file = gr.File()

    # 1) "Find Matches" => updates the dropdown choices and the state
    submit_btn.click(
        fn=match_clinical_trials_dropdown,
        inputs=[patient_summary_input, max_results_input],
        outputs=[trial_dropdown, results_state]
    )

    # 2) Selecting from the dropdown => shows more info
    trial_dropdown.change(
        fn=show_selected_trial,
        inputs=[trial_dropdown, results_state],
        outputs=trial_details_box
    )

    # 3) Export => CSV
    export_btn.click(
        fn=export_results,
        inputs=results_state,
        outputs=download_file
    )

    # Enable queue so "Processing..." is shown if logic is slow
    demo.queue()

if __name__ == "__main__":
    demo.launch()