Spaces:

LofiAmazon
/

LofiAmazonSpace

Sleeping

App Files Files Community

jennzhuge commited on Jun 2, 2024

Commit

89a88ac

1 Parent(s): a0e49d5

hi

Browse files

Files changed (1) hide show

app.py +62 -16

app.py CHANGED Viewed

@@ -1,9 +1,12 @@
 import json
 import pandas as pd
 import gradio as gr
 # from transformers import PreTrainedTokenizerFast, BertForMaskedLM
 from datasets import load_dataset
 import infer
 with open("default_inputs.json", "r") as default_inputs_file:
     DEFAULT_INPUTS = json.load(default_inputs_file)
@@ -42,6 +45,8 @@ def preprocess():
 def predict_genus():
     data = preprocess()
     out = infer.infer_dna(data)
     results = []
@@ -54,35 +59,73 @@ def predict_genus():
     return results
-def tsne():
-    return plots
 with gr.Blocks() as demo:
     # Header section
     gr.Markdown("# DNA Identifier Tool")
-    gr.Markdown("Welcome to Lofi Amazon Beats' DNA Identifier Tool")
     with gr.Tab("Genus Prediction"):
-        gr.Markdown("Enter a DNA sequence and the coordinates at which its sample was taken to get a genus prediction. Click 'I'm feeling lucky' to see a prediction for a random sequence.")
         # Collect inputs for app (DNA and location)
-        with gr.Row():
-            with gr.Column():
-                inp_dna = gr.Textbox(label="DNA", placeholder="e.g. AACAATGTA... (min 200 and max 660 characters)")
-            with gr.Column():
-                with gr.Row():
-                    inp_lat = gr.Textbox(label="Latitude", placeholder="e.g. -3.009083")
-                with gr.Row():
-                    inp_lng = gr.Textbox(label="Longitude", placeholder="e.g. -58.68281")
-        with gr.Row():
-            btn_run = gr.Button("Predict")
-            btn_defaults = gr.Button("I'm feeling lucky")
-            btn_defaults.click(fn=set_default_inputs, outputs=[inp_dna, inp_lat, inp_lng])
         with gr.Row():
             gr.Markdown('Make plot or table for Top 5 species')
@@ -97,6 +140,9 @@ with gr.Blocks() as demo:
         with gr.Row() as row:
             with gr.Column():
                 gr.Markdown("Plot of your DNA sequence among other known species clusters.")
             with gr.Column():
                 gr.Markdown("Plot of the five most common species at your sample coordinate.")

 import json
 import pandas as pd
+import numpy as np
 import gradio as gr
 # from transformers import PreTrainedTokenizerFast, BertForMaskedLM
 from datasets import load_dataset
 import infer
+import matplotlib.pyplot as plt
+from sklearn.manifold import TSNE
 with open("default_inputs.json", "r") as default_inputs_file:
     DEFAULT_INPUTS = json.load(default_inputs_file)
 def predict_genus():
     data = preprocess()
     out = infer.infer_dna(data)
     results = []
     return results
+def tsne_DNA(data, genuses):
+    data["embeddings"] = data["embeddings"].apply(lambda x: np.array(list(map(float, x[1:-1].split()))))
+    # Pick genuses with most samples
+    top_k = 5
+    genus_counts = df["genus"].value_counts()
+    top_genuses = genus_counts.head(top_k).index
+    df = df[df["genus"].isin(top_genuses)]
+    # Create a t-SNE plot of the embeddings
+    n_genus = len(df["genus"].unique())
+    tsne = TSNE(n_components=2, perplexity=30, learning_rate=200, n_iter=1000, random_state=0)
+    X = np.stack(df["embeddings"].tolist())
+    y = df["genus"].tolist()
+    X_tsne = tsne.fit_transform(X)
+    label_encoder = LabelEncoder()
+    y_encoded = label_encoder.fit_transform(y)
+    plot = plt.figure(figsize=(6, 5))
+    scatter = plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y_encoded, cmap="viridis", alpha=0.7)
+    return plot
 with gr.Blocks() as demo:
     # Header section
     gr.Markdown("# DNA Identifier Tool")
+    gr.Markdown("Welcome to Lofi Amazon Beats' DNA Identifier Tool. Please enter a DNA sequence and the coordinates at which its sample was taken to get started. Click 'I'm feeling lucky' to see use a random sequence.")
+    with gr.Row():
+        with gr.Column():
+            inp_dna = gr.Textbox(label="DNA", placeholder="e.g. AACAATGTA... (min 200 and max 660 characters)")
+        with gr.Column():
+            with gr.Row():
+                inp_lat = gr.Textbox(label="Latitude", placeholder="e.g. -3.009083")
+            with gr.Row():
+                inp_lng = gr.Textbox(label="Longitude", placeholder="e.g. -58.68281")
+    with gr.Row():
+        btn_run = gr.Button("Predict")
+        btn_defaults = gr.Button("I'm feeling lucky")
+        btn_defaults.click(fn=set_default_inputs, outputs=[inp_dna, inp_lat, inp_lng])
     with gr.Tab("Genus Prediction"):
+        # gr.Markdown("Enter a DNA sequence and the coordinates at which its sample was taken to get a genus prediction. Click 'I'm feeling lucky' to see a prediction for a random sequence.")
         # Collect inputs for app (DNA and location)
+        # with gr.Row():
+        #     with gr.Column():
+        #         inp_dna = gr.Textbox(label="DNA", placeholder="e.g. AACAATGTA... (min 200 and max 660 characters)")
+        #     with gr.Column():
+        #         with gr.Row():
+        #             inp_lat = gr.Textbox(label="Latitude", placeholder="e.g. -3.009083")
+        #         with gr.Row():
+        #             inp_lng = gr.Textbox(label="Longitude", placeholder="e.g. -58.68281")
+        # with gr.Row():
+        #     btn_run = gr.Button("Predict")
+        #     btn_defaults = gr.Button("I'm feeling lucky")
+        #     btn_defaults.click(fn=set_default_inputs, outputs=[inp_dna, inp_lat, inp_lng])
         with gr.Row():
             gr.Markdown('Make plot or table for Top 5 species')
         with gr.Row() as row:
             with gr.Column():
                 gr.Markdown("Plot of your DNA sequence among other known species clusters.")
+                plot = gr.Plot("")
+                btn_run.click(fn=tsne_DNA, inputs=[inp_dna, genus_out])
             with gr.Column():
                 gr.Markdown("Plot of the five most common species at your sample coordinate.")