import gradio as gr import pandas as pd import numpy as np import umap import json import matplotlib.pyplot as plt import os import scanpy as sc import subprocess import sys from io import BytesIO from sklearn.linear_model import LogisticRegression from huggingface_hub import hf_hub_download def load_and_predict_with_classifier(x, model_path, output_path, save): # Load the model parameters from the JSON file with open(model_path, 'r') as f: model_params = json.load(f) # Reconstruct the logistic regression model model_loaded = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000) model_loaded.coef_ = np.array(model_params["coef"]) model_loaded.intercept_ = np.array(model_params["intercept"]) model_loaded.classes_ = np.array(model_params["classes"]) # output predictions y_pred = model_loaded.predict(x) # Convert the array to a Pandas DataFrame if save: df = pd.DataFrame(y_pred, columns=["predicted_cell_type"]) df.to_csv(output_path, index=False, header=False) return y_pred def plot_umap(adata): labels = pd.Categorical(adata.obs["cell_type"]) reducer = umap.UMAP(n_neighbors=15, min_dist=0.1, n_components=2, random_state=42) embedding = reducer.fit_transform(adata.obsm["X_uce"]) plt.figure(figsize=(10, 8)) # Create the scatter plot scatter = plt.scatter(embedding[:, 0], embedding[:, 1], c=labels.codes, cmap='Set1', s=50, alpha=0.6) # Create a legend handles = [] for i, cell_type in enumerate(labels.categories): handles.append(plt.Line2D([0], [0], marker='o', color='w', label=cell_type, markerfacecolor=plt.cm.Set1(i / len(labels.categories)), markersize=10)) plt.legend(handles=handles, title='Cell Type') plt.title('UMAP projection of the data') plt.xlabel('UMAP1') plt.ylabel('UMAP2') # Save plot to a BytesIO object buf = BytesIO() plt.savefig(buf, format='png') buf.seek(0) # Read the image from BytesIO object img = plt.imread(buf, format='png') return img def toggle_file_input(default_dataset): if default_dataset != "None": return gr.update(interactive=False) # Disable the file input if a default dataset is selected else: return gr.update(interactive=True) # Enable the file input if no default dataset is selected def download_files(base_path): # clone and cd into UCE repo os.system('git clone https://github.com/minwoosun/UCE.git') os.chdir(BASE_PATH) # Add the directory to the Python path sys.path.append(BASE_PATH) # Set default dataset path default_dataset_1_path = hf_hub_download(repo_id="minwoosun/uce-misc", filename="100_pbmcs_proc_subset.h5ad") default_dataset_2_path = hf_hub_download(repo_id="minwoosun/uce-misc", filename="1k_pbmcs_proc_subset.h5ad") return default_dataset_1_path, default_dataset_2_path def main(input_file_path, species, default_dataset, default_dataset_1_path, default_dataset_2_path): BASE_PATH = '/home/user/app/UCE/' # # clone and cd into UCE repo # os.system('git clone https://github.com/minwoosun/UCE.git') # os.chdir(BASE_PATH) # # Add the directory to the Python path # sys.path.append(BASE_PATH) # # Set default dataset path # default_dataset_1_path = hf_hub_download(repo_id="minwoosun/uce-misc", filename="100_pbmcs_proc_subset.h5ad") # default_dataset_2_path = hf_hub_download(repo_id="minwoosun/uce-misc", filename="1k_pbmcs_proc_subset.h5ad") # If the user selects a default dataset, use that instead of the uploaded file if default_dataset == "PBMC 100 cells": input_file_path = default_dataset_1_path elif default_dataset == "PBMC 1000 cells": input_file_path = default_dataset_2_path ############## # UCE # ############## from evaluate import AnndataProcessor from accelerate import Accelerator model_loc = 'minwoosun/uce-100m' # Construct the command command = [ 'python', BASE_PATH + 'eval_single_anndata.py', '--adata_path', input_file_path, '--dir', BASE_PATH, '--model_loc', model_loc ] # Print the command for debugging print("Running command:", command) print("---> RUNNING UCE") result = subprocess.run(command, capture_output=True, text=True, check=True) print(result.stdout) print(result.stderr) print("---> FINSIH UCE") ################################ # Cell-type classification # ################################ # Set output file path file_name_with_ext = os.path.basename(input_file_path) file_name = os.path.splitext(file_name_with_ext)[0] pred_file = BASE_PATH + f"{file_name}_predictions.csv" model_path = hf_hub_download(repo_id="minwoosun/uce-misc", filename="tabula_sapiens_v1_logistic_regression_model_weights.json") file_name_with_ext = os.path.basename(input_file_path) file_name = os.path.splitext(file_name_with_ext)[0] output_file = BASE_PATH + f"{file_name}_uce_adata.h5ad" adata = sc.read_h5ad(output_file) x = adata.obsm['X_uce'] y_pred = load_and_predict_with_classifier(x, model_path, pred_file, save=True) ############## # UMAP # ############## img = plot_umap(adata) return img, output_file, pred_file if __name__ == "__main__": BASE_PATH = '/home/user/app/UCE/' default_dataset_1_path, default_dataset_2_path = download_files(BASE_PATH) with gr.Blocks() as demo: gr.Markdown( '''