Hack90 commited on
Commit
b18e1b5
·
verified ·
1 Parent(s): 569db9f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -15
app.py CHANGED
@@ -23,15 +23,9 @@ mpl.rcParams.update(mpl.rcParamsDefault)
23
  df = pd.read_parquet('virus_ds.parquet')
24
  virus = df['Organism_Name'].unique()
25
  virus = {v: v for v in virus}
26
- df_new = pd.read_parquet("virus.parquet", columns= ['organism_name'])
27
- df_new = df_new.groupby('organism_name').apply(lambda x: x.head(100) if len(x) > 10 else None).reset_index(drop=True)
28
- filter_species = df_new.organism_name.value_counts().reset_index()[df_new.organism_name.value_counts().reset_index()['count'] > 40 ]['organism_name'][1:].tolist()
29
-
30
- df_old = pd.read_parquet("virus.parquet", columns =['seq', 'organism_name'])
31
- MASTER_DF = df_old[df_old['organism_name'].isin(filter_species)].copy()
32
- del df_new
33
- del df_old
34
- virus_new = {v: v for v in filter_species}
35
  loss_typesss = pd.read_csv("training_data_5.csv")['loss_type'].unique().tolist()
36
  model_typesss = pd.read_csv("training_data_5.csv")['model_type'].unique().tolist()
37
  param_typesss = pd.read_csv("training_data_5.csv")['param_type'].unique().tolist()
@@ -82,19 +76,15 @@ with ui.navset_card_tab(id="tab"):
82
  return plot_persistence_homology(filtered_df["Sequence"], filtered_df["Organism_Name"])
83
 
84
  with ui.nav_panel("Viral Genome Distributions"):
85
- ui.panel_title("How does sequence distribution vary across sequence length?")
86
  with ui.layout_columns():
87
  with ui.card():
88
  ui.input_selectize("virus_selector_1", "Select your viruses:", virus_new, multiple=True, selected=None)
89
- with ui.card():
90
- ui.input_slider(
91
- "basepair","Select basepair",0, 10000, 15
92
- )
93
 
94
  @render.plot()
95
  def plot_distro():
96
  df = MASTER_DF[MASTER_DF["organism_name"].isin(input.virus_selector_1())].copy()
97
- grouped = df.groupby("organism_name")["seq"].apply(list)
98
  return plot_distrobutions(grouped, grouped.index, input.basepair())
99
 
100
  with ui.nav_panel("Viral Microstructure"):
 
23
  df = pd.read_parquet('virus_ds.parquet')
24
  virus = df['Organism_Name'].unique()
25
  virus = {v: v for v in virus}
26
+ df_new = pd.read_parquet("distro.parquet", columns= ['organism_name']).tolist()
27
+ MASTER_DF = pd.read_parquet("distro.parquet")
28
+ virus_new = {v: v for v in df_new}
 
 
 
 
 
 
29
  loss_typesss = pd.read_csv("training_data_5.csv")['loss_type'].unique().tolist()
30
  model_typesss = pd.read_csv("training_data_5.csv")['model_type'].unique().tolist()
31
  param_typesss = pd.read_csv("training_data_5.csv")['param_type'].unique().tolist()
 
76
  return plot_persistence_homology(filtered_df["Sequence"], filtered_df["Organism_Name"])
77
 
78
  with ui.nav_panel("Viral Genome Distributions"):
79
+ ui.panel_title("How does sequence distribution vary for a specie?")
80
  with ui.layout_columns():
81
  with ui.card():
82
  ui.input_selectize("virus_selector_1", "Select your viruses:", virus_new, multiple=True, selected=None)
 
 
 
 
83
 
84
  @render.plot()
85
  def plot_distro():
86
  df = MASTER_DF[MASTER_DF["organism_name"].isin(input.virus_selector_1())].copy()
87
+ ax = sns.histplot(data=df, x='charts', hue='organism_name')
88
  return plot_distrobutions(grouped, grouped.index, input.basepair())
89
 
90
  with ui.nav_panel("Viral Microstructure"):