File size: 7,061 Bytes
76f2fb3
02970c0
db541e4
56bb9b9
334043f
02970c0
4d5beeb
f73076c
4d5beeb
ed36f4f
02970c0
4a7c05c
334043f
4a7c05c
6bc4f0e
7f5c48e
 
 
4a7c05c
7f5c48e
 
 
 
4a7c05c
7f5c48e
6bc4f0e
7f5c48e
6bc4f0e
7f5c48e
 
 
76f2fb3
4a7c05c
7f5c48e
6bc4f0e
ed36f4f
6bc4f0e
ed36f4f
 
 
4a7c05c
ed36f4f
 
 
 
 
 
 
 
6bc4f0e
ed36f4f
ac7c6bd
 
 
 
 
 
 
 
 
4a7c05c
ac7c6bd
 
 
 
 
 
 
 
02970c0
4d5beeb
 
 
6bc4f0e
 
4d5beeb
76f2fb3
4d5beeb
 
 
 
 
 
 
6bc4f0e
4d5beeb
ff4e1a8
6bc4f0e
4d5beeb
76f2fb3
d92a3e6
4d5beeb
 
 
affd796
ed36f4f
 
 
76f2fb3
 
ff4e1a8
 
6bc4f0e
ed36f4f
76f2fb3
ed36f4f
334043f
ed36f4f
 
ac7c6bd
 
 
 
 
 
 
 
 
 
 
 
 
affd796
 
2f14da2
 
 
 
f73076c
 
ac7c6bd
f73076c
ed36f4f
f257a1d
 
2f14da2
 
affd796
4f972b8
 
 
ac7c6bd
 
4f972b8
 
 
 
ac7c6bd
 
4f972b8
4d5beeb
 
db541e4
affd796
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import re, os
from pathlib import Path
import gradio as gr
import spaces
import torch

from evodiff.pretrained import OA_DM_38M, D3PM_UNIFORM_38M, MSA_OA_DM_MAXSUB
from evodiff.generate import generate_oaardm, generate_d3pm
from evodiff.generate_msa import generate_query_oadm_msa_simple
from evodiff.conditional_generation import inpaint_simple, generate_scaffold

device = 'cuda' if torch.cuda.is_available() else 'cpu'

@spaces.GPU()
def make_uncond_seq(seq_len, model_type):
    if model_type == "EvoDiff-Seq-OADM 38M":
        checkpoint = OA_DM_38M()
        model, collater, tokenizer, scheme = checkpoint
        tokeinzed_sample, generated_sequence = generate_oaardm(model, tokenizer, int(seq_len), batch_size=1, device=device)
    
    if model_type == "EvoDiff-D3PM-Uniform 38M":
        checkpoint = D3PM_UNIFORM_38M(return_all=True)
        model, collater, tokenizer, scheme, timestep, Q_bar, Q = checkpoint
        tokeinzed_sample, generated_sequence = generate_d3pm(model, tokenizer, Q, Q_bar, timestep, int(seq_len), batch_size=1, device=device)

    return generated_sequence

def make_cond_seq(seq_len, msa_file, n_sequences, model_type):
    if model_type == "EvoDiff-MSA":
        checkpoint = MSA_OA_DM_MAXSUB()
        model, collater, tokenizer, scheme = checkpoint
        print(f"MSA File Path: {msa_file.name}")
        tokeinzed_sample, generated_sequence  = generate_query_oadm_msa_simple(msa_file.name, model, tokenizer, int(n_sequences), seq_length=int(seq_len), device=device, selection_type='random')

    return generated_sequence
    
def make_inpainted_idrs(sequence, start_idx, end_idx, model_type):
    if model_type == "EvoDiff-Seq":
        checkpoint = OA_DM_38M()
        model, collater, tokenizer, scheme = checkpoint
        sample, entire_sequence, generated_idr = inpaint_simple(model, sequence, int(start_idx), int(end_idx), tokenizer=tokenizer, device=device)

        generated_idr_output = {
            "original_sequence": sequence,
            "generated_sequence": entire_sequence,
            "original_region": sequence[start_idx:end_idx],
            "generated_region": generated_idr
        }

    return generated_idr_output
    
# def make_scaffold_motifs(pdb_code, start_idx, end_idx, scaffold_length, model_type):
#     if model_type == "EvoDiff-Seq":
#         checkpoint = OA_DM_38M()
#         model, collater, tokenizer, scheme = checkpoint
#         data_top_dir = '/home/user/.cache/huggingface/datasets/'
#         os.makedirs(data_top_dir, exist_ok=True)
#         # print("Folders in User Cache Directory:", os.listdir("/home/user/.cache"))
#         start_idx = list(map(int, start_idx.strip('][').split(',')))
#         end_idx = list(map(int, end_idx.strip('][').split(',')))
#         generated_sequence, new_start_idx, new_end_idx = generate_scaffold(model, pdb_code, start_idx, end_idx, scaffold_length, data_top_dir, tokenizer, device=device)

#         generated_scaffold_output = {
#             "generated_sequence": generated_sequence,
#             "new_start_index": new_start_idx,
#             "new_end_index": new_end_idx
#         }

#     return generated_scaffold_output

usg_app = gr.Interface(
            fn=make_uncond_seq,
            inputs=[
                gr.Slider(10, 250, step=1, label = "Sequence Length"),
                gr.Dropdown(["EvoDiff-Seq-OADM 38M", "EvoDiff-D3PM-Uniform 38M"], value="EvoDiff-Seq-OADM 38M", type="value", label = "Model")
                ],
            outputs=["text"],
            title = "Unconditional sequence generation",
            description="Generate a sequence with `EvoDiff-Seq-OADM 38M` (smaller/faster) or `EvoDiff-D3PM-Uniform 38M` (larger/slower) models."
            )

csg_app = gr.Interface(
            fn=make_cond_seq,
            inputs=[
                gr.Slider(10, 250, label = "Sequence Length"),
                gr.File(file_types=["a3m"], label = "MSA File"),
                gr.Number(value=64, precision=0, label = "Number of Sequences to Sample"),
                gr.Dropdown(["EvoDiff-MSA"], value="EvoDiff-MSA", type="value", label = "Model")
                ],
            outputs=["text"],
            # examples=[["https://github.com/microsoft/evodiff/raw/main/examples/example_files/bfd_uniclust_hits.a3m"]], 
            title = "Conditional sequence generation",
            description="Evolutionary guided sequence generation with the `EvoDiff-MSA` model."
            )

idr_app = gr.Interface(
            fn=make_inpainted_idrs,
            inputs=[
                gr.Textbox(value = "DQTERTVRSFEGRRTAPYLDSRNVLTIGYGHLLNRPGANKSWEGRLTSALPREFKQRLTELAASQLHETDVRLATARAQALYGSGAYFESVPVSLNDLWFDSVFNLGERKLLNWSGLRTKLESRDWGAAAKDLGRHTFGREPVSRRMAESMRMRRGIDLNHYNI",
                           label = "Sequence"),
                gr.Number(value=20, precision=0, label = "Start Index"),
                gr.Number(value=50, precision=0, label = "End Index"),
                gr.Dropdown(["EvoDiff-Seq"], value="EvoDiff-Seq", type="value", label = "Model")
                ],
            outputs=["text"],
            title = "Inpainting IDRs",
            description="Inpainting a new region inside a given sequence using the `EvoDiff-Seq` model."
            )

# scaffold_app = gr.Interface(
#             fn=make_scaffold_motifs,
#             inputs=[
#                 gr.Textbox(value="1prw", label = "PDB Code"),
#                 gr.Textbox(value="[15, 51]", label = "Start Index (as list)"),
#                 gr.Textbox(value="[34, 70]", label = "End Index (as list)"),
#                 gr.Number(value=75, precision=0, label = "Scaffold Length"),
#                 gr.Dropdown(["EvoDiff-Seq", "EvoDiff-MSA"], value="EvoDiff-Seq", type="value", label = "Model")
#                 ],
#             outputs=["text"],
#             title = "Scaffolding functional motifs",
#             description="Scaffolding a new functional motif inside a given PDB structure using the `EvoDiff-Seq` model."
#             )

with gr.Blocks() as edapp:
    with gr.Row():
        gr.Markdown(
            """
            # EvoDiff
            ## Generation of protein sequences and evolutionary alignments via discrete diffusion models

            Created By: Microsoft Research [Sarah Alamdari, Nitya Thakkar, Rianne van den Berg, Alex X. Lu, Nicolo Fusi, Ava P. Amini, and Kevin K. Yang]
            
            Spaces App By: Tuple, The Cloud Genomics Company [Colby T. Ford]

            <span style="color:red">Note: When you first run this app, the models will take a few minutes to download from Zenodo. Check the logs for the download status.</span>
            """
        )
    with gr.Row():
        gr.TabbedInterface([
            usg_app,
            csg_app,
            idr_app#,
            # scaffold_app
            ],
            [
                "Unconditional sequence generation",
                "Conditional generation",
                "Inpainting IDRs"#,
                # "Scaffolding functional motifs"
            ])



if __name__ == "__main__":
    edapp.launch()