Spaces:
Sleeping
Sleeping
File size: 7,061 Bytes
76f2fb3 02970c0 db541e4 56bb9b9 334043f 02970c0 4d5beeb f73076c 4d5beeb ed36f4f 02970c0 4a7c05c 334043f 4a7c05c 6bc4f0e 7f5c48e 4a7c05c 7f5c48e 4a7c05c 7f5c48e 6bc4f0e 7f5c48e 6bc4f0e 7f5c48e 76f2fb3 4a7c05c 7f5c48e 6bc4f0e ed36f4f 6bc4f0e ed36f4f 4a7c05c ed36f4f 6bc4f0e ed36f4f ac7c6bd 4a7c05c ac7c6bd 02970c0 4d5beeb 6bc4f0e 4d5beeb 76f2fb3 4d5beeb 6bc4f0e 4d5beeb ff4e1a8 6bc4f0e 4d5beeb 76f2fb3 d92a3e6 4d5beeb affd796 ed36f4f 76f2fb3 ff4e1a8 6bc4f0e ed36f4f 76f2fb3 ed36f4f 334043f ed36f4f ac7c6bd affd796 2f14da2 f73076c ac7c6bd f73076c ed36f4f f257a1d 2f14da2 affd796 4f972b8 ac7c6bd 4f972b8 ac7c6bd 4f972b8 4d5beeb db541e4 affd796 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
import re, os
from pathlib import Path
import gradio as gr
import spaces
import torch
from evodiff.pretrained import OA_DM_38M, D3PM_UNIFORM_38M, MSA_OA_DM_MAXSUB
from evodiff.generate import generate_oaardm, generate_d3pm
from evodiff.generate_msa import generate_query_oadm_msa_simple
from evodiff.conditional_generation import inpaint_simple, generate_scaffold
device = 'cuda' if torch.cuda.is_available() else 'cpu'
@spaces.GPU()
def make_uncond_seq(seq_len, model_type):
if model_type == "EvoDiff-Seq-OADM 38M":
checkpoint = OA_DM_38M()
model, collater, tokenizer, scheme = checkpoint
tokeinzed_sample, generated_sequence = generate_oaardm(model, tokenizer, int(seq_len), batch_size=1, device=device)
if model_type == "EvoDiff-D3PM-Uniform 38M":
checkpoint = D3PM_UNIFORM_38M(return_all=True)
model, collater, tokenizer, scheme, timestep, Q_bar, Q = checkpoint
tokeinzed_sample, generated_sequence = generate_d3pm(model, tokenizer, Q, Q_bar, timestep, int(seq_len), batch_size=1, device=device)
return generated_sequence
def make_cond_seq(seq_len, msa_file, n_sequences, model_type):
if model_type == "EvoDiff-MSA":
checkpoint = MSA_OA_DM_MAXSUB()
model, collater, tokenizer, scheme = checkpoint
print(f"MSA File Path: {msa_file.name}")
tokeinzed_sample, generated_sequence = generate_query_oadm_msa_simple(msa_file.name, model, tokenizer, int(n_sequences), seq_length=int(seq_len), device=device, selection_type='random')
return generated_sequence
def make_inpainted_idrs(sequence, start_idx, end_idx, model_type):
if model_type == "EvoDiff-Seq":
checkpoint = OA_DM_38M()
model, collater, tokenizer, scheme = checkpoint
sample, entire_sequence, generated_idr = inpaint_simple(model, sequence, int(start_idx), int(end_idx), tokenizer=tokenizer, device=device)
generated_idr_output = {
"original_sequence": sequence,
"generated_sequence": entire_sequence,
"original_region": sequence[start_idx:end_idx],
"generated_region": generated_idr
}
return generated_idr_output
# def make_scaffold_motifs(pdb_code, start_idx, end_idx, scaffold_length, model_type):
# if model_type == "EvoDiff-Seq":
# checkpoint = OA_DM_38M()
# model, collater, tokenizer, scheme = checkpoint
# data_top_dir = '/home/user/.cache/huggingface/datasets/'
# os.makedirs(data_top_dir, exist_ok=True)
# # print("Folders in User Cache Directory:", os.listdir("/home/user/.cache"))
# start_idx = list(map(int, start_idx.strip('][').split(',')))
# end_idx = list(map(int, end_idx.strip('][').split(',')))
# generated_sequence, new_start_idx, new_end_idx = generate_scaffold(model, pdb_code, start_idx, end_idx, scaffold_length, data_top_dir, tokenizer, device=device)
# generated_scaffold_output = {
# "generated_sequence": generated_sequence,
# "new_start_index": new_start_idx,
# "new_end_index": new_end_idx
# }
# return generated_scaffold_output
usg_app = gr.Interface(
fn=make_uncond_seq,
inputs=[
gr.Slider(10, 250, step=1, label = "Sequence Length"),
gr.Dropdown(["EvoDiff-Seq-OADM 38M", "EvoDiff-D3PM-Uniform 38M"], value="EvoDiff-Seq-OADM 38M", type="value", label = "Model")
],
outputs=["text"],
title = "Unconditional sequence generation",
description="Generate a sequence with `EvoDiff-Seq-OADM 38M` (smaller/faster) or `EvoDiff-D3PM-Uniform 38M` (larger/slower) models."
)
csg_app = gr.Interface(
fn=make_cond_seq,
inputs=[
gr.Slider(10, 250, label = "Sequence Length"),
gr.File(file_types=["a3m"], label = "MSA File"),
gr.Number(value=64, precision=0, label = "Number of Sequences to Sample"),
gr.Dropdown(["EvoDiff-MSA"], value="EvoDiff-MSA", type="value", label = "Model")
],
outputs=["text"],
# examples=[["https://github.com/microsoft/evodiff/raw/main/examples/example_files/bfd_uniclust_hits.a3m"]],
title = "Conditional sequence generation",
description="Evolutionary guided sequence generation with the `EvoDiff-MSA` model."
)
idr_app = gr.Interface(
fn=make_inpainted_idrs,
inputs=[
gr.Textbox(value = "DQTERTVRSFEGRRTAPYLDSRNVLTIGYGHLLNRPGANKSWEGRLTSALPREFKQRLTELAASQLHETDVRLATARAQALYGSGAYFESVPVSLNDLWFDSVFNLGERKLLNWSGLRTKLESRDWGAAAKDLGRHTFGREPVSRRMAESMRMRRGIDLNHYNI",
label = "Sequence"),
gr.Number(value=20, precision=0, label = "Start Index"),
gr.Number(value=50, precision=0, label = "End Index"),
gr.Dropdown(["EvoDiff-Seq"], value="EvoDiff-Seq", type="value", label = "Model")
],
outputs=["text"],
title = "Inpainting IDRs",
description="Inpainting a new region inside a given sequence using the `EvoDiff-Seq` model."
)
# scaffold_app = gr.Interface(
# fn=make_scaffold_motifs,
# inputs=[
# gr.Textbox(value="1prw", label = "PDB Code"),
# gr.Textbox(value="[15, 51]", label = "Start Index (as list)"),
# gr.Textbox(value="[34, 70]", label = "End Index (as list)"),
# gr.Number(value=75, precision=0, label = "Scaffold Length"),
# gr.Dropdown(["EvoDiff-Seq", "EvoDiff-MSA"], value="EvoDiff-Seq", type="value", label = "Model")
# ],
# outputs=["text"],
# title = "Scaffolding functional motifs",
# description="Scaffolding a new functional motif inside a given PDB structure using the `EvoDiff-Seq` model."
# )
with gr.Blocks() as edapp:
with gr.Row():
gr.Markdown(
"""
# EvoDiff
## Generation of protein sequences and evolutionary alignments via discrete diffusion models
Created By: Microsoft Research [Sarah Alamdari, Nitya Thakkar, Rianne van den Berg, Alex X. Lu, Nicolo Fusi, Ava P. Amini, and Kevin K. Yang]
Spaces App By: Tuple, The Cloud Genomics Company [Colby T. Ford]
<span style="color:red">Note: When you first run this app, the models will take a few minutes to download from Zenodo. Check the logs for the download status.</span>
"""
)
with gr.Row():
gr.TabbedInterface([
usg_app,
csg_app,
idr_app#,
# scaffold_app
],
[
"Unconditional sequence generation",
"Conditional generation",
"Inpainting IDRs"#,
# "Scaffolding functional motifs"
])
if __name__ == "__main__":
edapp.launch() |