TIPars-UI / app.py
tracywong117's picture
Change html
6dd949f
from tipars.plot_tree import plot_fig
import gradio as gr
import os
import pandas as pd
import subprocess
def get_sequence_example(example):
if example.startswith("H5"):
return open("tipars/ref-tree/Influenza-A-H5/H5.fasta").read()
elif example.startswith("SARS-CoV-2"):
return open("tipars/ref-tree/sars-cov-2/sars2.fasta").read()
def mirror(a, b, c, d):
return a, b, c, d
def insertion(
task,
tipars_file1,
tipars_file2,
tipars_file3,
tipars_file4,
input_textbox1,
input_textbox2,
):
# remove tipars.tree if exists
if os.path.exists("tipars/tipars.tree"):
os.remove("tipars/tipars.tree")
if task == "tipars":
print(
tipars_file1.name, tipars_file2.name, tipars_file3.name, tipars_file4.name
)
query = tipars_file1.name
tree = tipars_file2.name
msa = tipars_file3.name
anc = tipars_file4.name
if task == "h5":
with open("tipars/test.fasta", "w") as f:
f.write(input_textbox2)
query = "tipars/test.fasta"
tree = "tipars/ref-tree/Influenza-A-H5/tree.nwk"
msa = "tipars/ref-tree/Influenza-A-H5/taxa.fasta"
anc = "tipars/ref-tree/Influenza-A-H5/anc.fas"
elif task == "sars":
with open("tipars/test.fasta", "w") as f:
f.write(input_textbox1)
query = "tipars/test.fasta"
tree = "tipars/ref-tree/sars-cov-2/sars-cov-2_ready.tree"
msa = "tipars/ref-tree/sars-cov-2/sars-cov-2_taxa.fasta"
anc = "tipars/ref-tree/sars-cov-2/sars-cov-2_anc.fas"
# open query fasta file
# and add '(Query)' after '>'
# print(query, tree, msa, anc)
with open(query, "r") as f:
lines = f.readlines()
with open("tipars/query.fasta", "w") as f:
for line in lines:
if line.startswith(">"):
f.write(line.strip() + "_query\n")
else:
f.write(line)
output = "tipars/tipars.tree"
query = "tipars/query.fasta"
command = ["mafft-linux64/mafft.bat", "--add", query, "--keeplength", msa]
with open("mafft_output.fas", "w") as output_file:
subprocess.run(command, stdout=output_file, text=True)
command = ["./seqkit", "seq", "-n", query]
with open("query-name.txt", "w") as output_file:
subprocess.run(command, stdout=output_file, text=True)
command = ["./seqkit", "grep", "-f", "query-name.txt", "mafft_output.fas"]
with open(query, "w") as output_file:
subprocess.run(command, stdout=output_file, text=True)
subprocess.run(
[
"tipars/tipars",
"-t",
tree,
"-s",
msa,
"-a",
anc,
"-q",
"tipars/query.fasta",
"-o",
output,
]
)
fig = plot_fig(output)
return fig
css = """
h1 {
text-align: center;
display:block;
}
"""
with gr.Blocks(css=css) as demo:
gr.Image(
"tipars/tipars.svg",
width=50,
height=50,
min_width=0,
container=False,
show_download_button=False,
)
task_selected = gr.State("sars")
gr.Markdown(
"""# Phylogenetic Placement by TIPars
<center>
This is HuggingFace hosting of <a href=https://github.com/id-bioinfo/TIPars>TIPars</a>.
</center>
"""
)
with gr.Row():
with gr.Column():
gr.Markdown(
"""## Input
Input your query sequences.
"""
)
with gr.Tab("SARS-Cov-2 Full Genome Tree") as sars_tab:
gr.Markdown(
"""
### Query Sequences
Sequences to be insert to Reference Tree (.fasta)
"""
)
input_textbox1 = gr.Textbox(label="Query Sequences")
gr.Examples(
examples=[
["SARS-CoV-2"],
],
inputs=input_textbox1,
fn=get_sequence_example,
cache_examples=True,
outputs=input_textbox1,
)
with gr.Tab("Influenza-A-H5 HA Tree") as h5_tab:
gr.Markdown(
"""
### Query Sequences
Sequences to be insert to Reference Tree (.fasta)
"""
)
input_textbox2 = gr.Textbox(label="Query Sequences")
gr.Examples(
examples=[["H5"]],
inputs=input_textbox2,
fn=get_sequence_example,
cache_examples=True,
outputs=input_textbox2,
)
with gr.Tab("TIPars") as tipars_tab:
gr.Markdown(
"""
### Query Sequences
Sequences to be insert to Reference Tree (.fasta)
"""
)
tipars_file1 = gr.File(
label="Query Sequences",
file_types=["fasta", "fas", "fna", "ffn", "faa", "frn", "fa"],
)
gr.Markdown(
"""
### Rooted Reference Tree
The tree which new sample shall be inserted to (.nwk)
"""
)
tipars_file2 = gr.File(
label="Rooted Reference Tree", file_types=["nwk", "new", "tree"]
)
gr.Markdown(
"""
### Tree MSA (Multiple Sequence Alignment)
The multiple sequence alignment of taxa that built the Reference Tree (.fasta)
"""
)
tipars_file3 = gr.File(
label="Tree MSA (Multiple Sequence Alignment)",
file_types=["fasta", "fas", "fna", "ffn", "faa", "frn", "fa"],
)
gr.Markdown(
"""
### Ancestral Sequence Alignment
Ancestral sequence (aligned) obtained from PastML or TreeTime (.fasta)
"""
)
tipars_file4 = gr.File(
label="Ancestral Sequence Alignment",
file_types=["fasta", "fas", "fna", "ffn", "faa", "frn", "fa"],
)
gr.Examples(
examples=[
[
(
os.path.join(
os.path.dirname(__file__),
"tipars/Benchmark_datasets/NDV/NDV_query.fas",
)
),
(
os.path.join(
os.path.dirname(__file__),
"tipars/Benchmark_datasets/NDV/NDV_tree.nwk",
)
),
(
os.path.join(
os.path.dirname(__file__),
"tipars/Benchmark_datasets/NDV/NDV_taxa.fas",
)
),
(
os.path.join(
os.path.dirname(__file__),
"tipars/Benchmark_datasets/NDV/NDV_anc.fas",
)
),
]
],
inputs=[tipars_file1, tipars_file2, tipars_file3, tipars_file4],
fn=mirror,
# cache_examples=True,
outputs=[tipars_file1, tipars_file2, tipars_file3, tipars_file4],
)
btn = gr.Button("Run")
with gr.Column():
gr.Markdown(
"""## Output
TIPars insertion result:
"""
)
charts = gr.Plot(label="Tree")
h5_tab.select(lambda: "h5", inputs=None, outputs=task_selected)
sars_tab.select(lambda: "sars", inputs=None, outputs=task_selected)
tipars_tab.select(lambda: "tipars", inputs=None, outputs=task_selected)
btn.click(
fn=insertion,
inputs=[
task_selected,
tipars_file1,
tipars_file2,
tipars_file3,
tipars_file4,
input_textbox1,
input_textbox2,
],
outputs=[charts],
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)