Spaces:
Running
Running
File size: 6,877 Bytes
76f2fb3 02970c0 db541e4 334043f 02970c0 4d5beeb f73076c 4d5beeb ed36f4f 02970c0 db541e4 334043f 6bc4f0e 7f5c48e 334043f 7f5c48e 334043f 7f5c48e 6bc4f0e 7f5c48e 6bc4f0e 7f5c48e 76f2fb3 334043f 7f5c48e 6bc4f0e ed36f4f 6bc4f0e ed36f4f 334043f ed36f4f 6bc4f0e ed36f4f 6bc4f0e ed36f4f 4455bd5 6b700d7 d14afa6 c7cc63a 334043f ed36f4f 6bc4f0e 02970c0 4d5beeb 6bc4f0e 4d5beeb 76f2fb3 4d5beeb 6bc4f0e 4d5beeb ff4e1a8 6bc4f0e 4d5beeb 76f2fb3 d92a3e6 4d5beeb affd796 ed36f4f 76f2fb3 ff4e1a8 6bc4f0e ed36f4f 76f2fb3 ed36f4f 334043f ed36f4f ff4e1a8 6bc4f0e ed36f4f 76f2fb3 ed36f4f affd796 2f14da2 f73076c ed36f4f 2f14da2 affd796 4f972b8 334043f 4f972b8 334043f 4f972b8 4d5beeb db541e4 affd796 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
import re, os
from pathlib import Path
import gradio as gr
import torch
from evodiff.pretrained import OA_DM_38M, D3PM_UNIFORM_38M, MSA_OA_DM_MAXSUB
from evodiff.generate import generate_oaardm, generate_d3pm
from evodiff.generate_msa import generate_query_oadm_msa_simple
from evodiff.conditional_generation import inpaint_simple, generate_scaffold
def get_device():
if torch.cuda.is_available():
return "cuda"
else:
return "cpu"
def make_uncond_seq(seq_len, model_type):
if model_type == "EvoDiff-Seq-OADM 38M":
checkpoint = OA_DM_38M()
model, collater, tokenizer, scheme = checkpoint
tokeinzed_sample, generated_sequence = generate_oaardm(model, tokenizer, int(seq_len), batch_size=1, device=get_device())
if model_type == "EvoDiff-D3PM-Uniform 38M":
checkpoint = D3PM_UNIFORM_38M(return_all=True)
model, collater, tokenizer, scheme, timestep, Q_bar, Q = checkpoint
tokeinzed_sample, generated_sequence = generate_d3pm(model, tokenizer, Q, Q_bar, timestep, int(seq_len), batch_size=1, device=get_device())
return generated_sequence
def make_cond_seq(seq_len, msa_file, n_sequences, model_type):
if model_type == "EvoDiff-MSA":
checkpoint = MSA_OA_DM_MAXSUB()
model, collater, tokenizer, scheme = checkpoint
print(f"MSA File Path: {msa_file.name}")
tokeinzed_sample, generated_sequence = generate_query_oadm_msa_simple(msa_file.name, model, tokenizer, int(n_sequences), seq_length=int(seq_len), device=get_device(), selection_type='random')
return generated_sequence
def make_inpainted_idrs(sequence, start_idx, end_idx, model_type):
if model_type == "EvoDiff-Seq":
checkpoint = OA_DM_38M()
model, collater, tokenizer, scheme = checkpoint
sample, entire_sequence, generated_idr = inpaint_simple(model, sequence, int(start_idx), int(end_idx), tokenizer=tokenizer, device=get_device())
generated_idr_output = {
"original_sequence": sequence,
"generated_sequence": entire_sequence,
"original_region": sequence[start_idx:end_idx],
"generated_region": generated_idr
}
return generated_idr_output
def make_scaffold_motifs(pdb_code, start_idx, end_idx, scaffold_length, model_type):
if model_type == "EvoDiff-Seq":
checkpoint = OA_DM_38M()
model, collater, tokenizer, scheme = checkpoint
data_top_dir = '/home/user/.cache/huggingface/datasets/'
os.makedirs(data_top_dir, exist_ok=True)
# print("Folders in User Cache Directory:", os.listdir("/home/user/.cache"))
start_idx = list(map(int, start_idx.strip('][').split(',')))
end_idx = list(map(int, end_idx.strip('][').split(',')))
generated_sequence, new_start_idx, new_end_idx = generate_scaffold(model, pdb_code, start_idx, end_idx, scaffold_length, data_top_dir, tokenizer, device=get_device())
generated_scaffold_output = {
"generated_sequence": generated_sequence,
"new_start_index": new_start_idx,
"new_end_index": new_end_idx
}
return generated_scaffold_output
usg_app = gr.Interface(
fn=make_uncond_seq,
inputs=[
gr.Slider(10, 250, step=1, label = "Sequence Length"),
gr.Dropdown(["EvoDiff-Seq-OADM 38M", "EvoDiff-D3PM-Uniform 38M"], value="EvoDiff-Seq-OADM 38M", type="value", label = "Model")
],
outputs=["text"],
title = "Unconditional sequence generation",
description="Generate a sequence with `EvoDiff-Seq-OADM 38M` (smaller/faster) or `EvoDiff-D3PM-Uniform 38M` (larger/slower) models."
)
csg_app = gr.Interface(
fn=make_cond_seq,
inputs=[
gr.Slider(10, 250, label = "Sequence Length"),
gr.File(file_types=["a3m"], label = "MSA File"),
gr.Number(value=64, precision=0, label = "Number of Sequences to Sample"),
gr.Dropdown(["EvoDiff-MSA"], value="EvoDiff-MSA", type="value", label = "Model")
],
outputs=["text"],
# examples=[["https://github.com/microsoft/evodiff/raw/main/examples/example_files/bfd_uniclust_hits.a3m"]],
title = "Conditional sequence generation",
description="Evolutionary guided sequence generation with the `EvoDiff-MSA` model."
)
idr_app = gr.Interface(
fn=make_inpainted_idrs,
inputs=[
gr.Textbox(value = "DQTERTVRSFEGRRTAPYLDSRNVLTIGYGHLLNRPGANKSWEGRLTSALPREFKQRLTELAASQLHETDVRLATARAQALYGSGAYFESVPVSLNDLWFDSVFNLGERKLLNWSGLRTKLESRDWGAAAKDLGRHTFGREPVSRRMAESMRMRRGIDLNHYNI",
label = "Sequence"),
gr.Number(value=20, precision=0, label = "Start Index"),
gr.Number(value=50, precision=0, label = "End Index"),
gr.Dropdown(["EvoDiff-Seq"], value="EvoDiff-Seq", type="value", label = "Model")
],
outputs=["text"],
title = "Inpainting IDRs",
description="Inpainting a new region inside a given sequence using the `EvoDiff-Seq` model."
)
scaffold_app = gr.Interface(
fn=make_scaffold_motifs,
inputs=[
gr.Textbox(value="1prw", label = "PDB Code"),
gr.Textbox(value="[15, 51]", label = "Start Index (as list)"),
gr.Textbox(value="[34, 70]", label = "End Index (as list)"),
gr.Number(value=75, precision=0, label = "Scaffold Length"),
gr.Dropdown(["EvoDiff-Seq", "EvoDiff-MSA"], value="EvoDiff-Seq", type="value", label = "Model")
],
outputs=["text"],
title = "Scaffolding functional motifs",
description="Scaffolding a new functional motif inside a given PDB structure using the `EvoDiff-Seq` model."
)
with gr.Blocks() as edapp:
with gr.Row():
gr.Markdown(
"""
# EvoDiff
## Generation of protein sequences and evolutionary alignments via discrete diffusion models
Created By: Microsoft Research [Sarah Alamdari, Nitya Thakkar, Rianne van den Berg, Alex X. Lu, Nicolo Fusi, ProfileAva P. Amini, and Kevin K. Yang]
Spaces App By: Tuple, The Cloud Genomics Company [Colby T. Ford]
"""
)
with gr.Row():
gr.TabbedInterface([
usg_app,
csg_app,
idr_app,
scaffold_app
],
[
"Unconditional sequence generation",
"Conditional generation",
"Inpainting IDRs",
"Scaffolding functional motifs"
])
if __name__ == "__main__":
edapp.launch() |