import gradio as gr import pandas as pd import umap import matplotlib.pyplot as plt import os import tempfile import scanpy as sc import argparse import subprocess import sys from io import BytesIO from huggingface_hub import hf_hub_download def main(input_file_path, species): # Get the current working directory current_working_directory = os.getcwd() # Print the current working directory print("Current Working Directory:", current_working_directory) # clone and cd into UCE repo os.system('git clone https://github.com/minwoosun/UCE.git') os.chdir('/home/user/app/UCE') # Get the current working directory current_working_directory = os.getcwd() # Print the current working directory print("Current Working Directory:", current_working_directory) # Specify the path to the directory you want to add new_directory = "/home/user/app/UCE" # Add the directory to the Python path sys.path.append(new_directory) ############## # UCE # ############## from evaluate import AnndataProcessor from accelerate import Accelerator # python eval_single_anndata.py --adata_path "./data/10k_pbmcs_proc.h5ad" --dir "./" --model_loc "minwoosun/uce-100m" script_name = "/home/user/app/eval_single_anndata.py" args = ["--adata_path", input_file_path, "--dir", "/home/user/app/UCE/", "--model_loc", "minwoosun/uce-100m"] command = ["python", script_name] + args try: result = subprocess.run(command, capture_output=True, text=True, check=True) print(result.stdout) print(result.stderr) except subprocess.CalledProcessError as e: print(f"Error executing command: {e}") ############## # UMAP # ############## UMAP = True if (UMAP): adata = sc.read_h5ad('/home/user/app/UCE/10k_pbmcs_proc_uce_adata.h5ad') labels = pd.Categorical(adata.obs["cell_type"]) reducer = umap.UMAP(n_neighbors=15, min_dist=0.1, n_components=2, random_state=42) embedding = reducer.fit_transform(adata.obsm["X_uce"]) plt.figure(figsize=(10, 8)) # Create the scatter plot scatter = plt.scatter(embedding[:, 0], embedding[:, 1], c=labels.codes, cmap='Set1', s=50, alpha=0.6) # Create a legend handles = [] for i, cell_type in enumerate(labels.categories): handles.append(plt.Line2D([0], [0], marker='o', color='w', label=cell_type, markerfacecolor=plt.cm.Set1(i / len(labels.categories)), markersize=10)) plt.legend(handles=handles, title='Cell Type') plt.title('UMAP projection of the data') plt.xlabel('UMAP1') plt.ylabel('UMAP2') # Save plot to a BytesIO object buf = BytesIO() plt.savefig(buf, format='png') buf.seek(0) # Read the image from BytesIO object img = plt.imread(buf, format='png') else: img = None print("no image") # this need to be changed based on data file name output_file = '/home/user/app/UCE/10k_pbmcs_proc_uce_adata.h5ad' return img, output_file if __name__ == "__main__": # Define Gradio inputs and outputs file_input = gr.File(label="Upload a .h5ad single cell gene expression file") species_input = gr.Dropdown(choices=["human", "mouse"], label="Select species") image_output = gr.Image(type="numpy", label="UMAP of UCE Embeddings") file_output = gr.File(label="Download embeddings") # Create the Gradio interface demo = gr.Interface( fn=main, inputs=[file_input, species_input], outputs=[image_output, file_output], title="UCE 100M Demo", description="Upload a .h5ad single cell gene expression file, and get a UMAP scatter plot along with the UMAP coordinates in a CSV file." ) demo.launch()