charlieoneill commited on
Commit
3187d23
1 Parent(s): 5df6c06
Files changed (2) hide show
  1. .gitignore +1 -0
  2. app.py +1 -129
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ data/
app.py CHANGED
@@ -55,8 +55,6 @@ def download_all_files():
55
 
56
  # Load configuration and initialize OpenAI client
57
  download_all_files()
58
- # config = yaml.safe_load(open('../config.yaml', 'r'))
59
- # client = OpenAI(api_key=config['jwu_openai_key'])
60
 
61
  # Load the API key from the environment variable
62
  api_key = os.getenv('openai_key')
@@ -100,10 +98,6 @@ def load_subject_data(subject):
100
  decoder = weights['decoder.weight'].cpu().numpy()
101
  del weights
102
 
103
- # # Load feature families
104
- # with open(families_path, 'r') as f:
105
- # feature_families = json.load(f)
106
-
107
  with open(family_analysis_path, 'r') as f:
108
  family_analysis = json.load(f)
109
 
@@ -533,11 +527,6 @@ def create_interface():
533
  visualize_button = gr.Button("Visualize Feature")
534
 
535
  feature_info = gr.Markdown()
536
- # abstracts_heading = gr.Markdown("## Top 5 Abstracts")
537
- # top_abstracts = gr.Dataframe(
538
- # headers=["Title", "Activation value"],
539
- # interactive=False
540
- # )
541
 
542
  abstracts_heading = gr.Markdown("## Top 5 Abstracts")
543
  top_abstracts = gr.Dataframe(
@@ -597,46 +586,6 @@ def create_interface():
597
  inputs=[feature_matches, subject],
598
  outputs=[feature_info, top_abstracts, top_correlated, bottom_correlated, co_occurring_features, activation_dist, feature_search, feature_matches]
599
  )
600
- # with gr.Row():
601
- # feature_search = gr.Textbox(label="Search Feature Labels")
602
- # feature_matches = gr.CheckboxGroup(label="Matching Features", choices=[])
603
- # visualize_button = gr.Button("Visualize Feature")
604
-
605
- # feature_info = gr.Markdown()
606
-
607
- # abstracts_heading = gr.Markdown("## Top 5 Abstracts")
608
- # top_abstracts = gr.Dataframe(
609
- # headers=["Title", "Activation value"],
610
- # datatype=["markdown", "number"],
611
- # interactive=False,
612
- # wrap=True
613
- # )
614
-
615
- # gr.Markdown("## Correlated Features")
616
- # with gr.Row():
617
- # with gr.Column(scale=1):
618
- # gr.Markdown("### Top 5 Correlated Features")
619
- # top_correlated = gr.Dataframe(
620
- # headers=["Feature", "Cosine similarity"],
621
- # interactive=False
622
- # )
623
- # with gr.Column(scale=1):
624
- # gr.Markdown("### Bottom 5 Correlated Features")
625
- # bottom_correlated = gr.Dataframe(
626
- # headers=["Feature", "Cosine similarity"],
627
- # interactive=False
628
- # )
629
-
630
- # with gr.Row():
631
- # with gr.Column(scale=1):
632
- # gr.Markdown("## Top 5 Co-occurring Features")
633
- # co_occurring_features = gr.Dataframe(
634
- # headers=["Feature", "Co-occurrences"],
635
- # interactive=False
636
- # )
637
- # with gr.Column(scale=1):
638
- # gr.Markdown(f"## Activation Value Distribution")
639
- # activation_dist = gr.Plot()
640
 
641
  with gr.Tab("Feature Families"):
642
  gr.Markdown("# Feature Families")
@@ -652,11 +601,7 @@ def create_interface():
652
  datatype=["markdown", "number", "number"],
653
  label="Family and Child Features"
654
  )
655
- # family_dataframe = gr.Dataframe(
656
- # headers=["Feature", "F1 Score", "Pearson Correlation"],
657
- # datatype=["str", "number", "number"],
658
- # label="Family and Child Features"
659
- # )
660
 
661
  def search_feature_families(search_text, current_subject):
662
  family_analysis = subject_data[current_subject]['family_analysis']
@@ -665,74 +610,6 @@ def create_interface():
665
  matches = [family['superfeature'] for family in family_analysis if search_text.lower() in family['superfeature'].lower()]
666
  return gr.CheckboxGroup(choices=matches[:10]) # Limit to top 10 matches
667
 
668
- # def visualize_feature_family(selected_families, current_subject):
669
- # if not selected_families:
670
- # return "Please select a feature family to visualize.", None
671
-
672
- # selected_family = selected_families[0] # Take the first selected family
673
- # family_analysis = subject_data[current_subject]['family_analysis']
674
-
675
- # family_data = next((family for family in family_analysis if family['superfeature'] == selected_family), None)
676
- # if not family_data:
677
- # return "Invalid feature family selected.", None
678
-
679
- # output = f"# {family_data['superfeature']}\n\n"
680
- # output += f"## Super Reasoning\n{family_data['super_reasoning']}\n\n"
681
-
682
- # # Create DataFrame
683
- # df_data = [
684
- # {
685
- # "Feature": family_data['superfeature'],
686
- # "F1 Score": family_data['family_f1'],
687
- # "Pearson Correlation": family_data['family_pearson']
688
- # }
689
- # ]
690
-
691
- # for name, f1, pearson in zip(family_data['feature_names'], family_data['feature_f1'], family_data['feature_pearson']):
692
- # df_data.append({
693
- # "Feature": name,
694
- # "F1 Score": f1,
695
- # "Pearson Correlation": pearson
696
- # })
697
-
698
- # df = pd.DataFrame(df_data)
699
-
700
- # return output, df
701
-
702
- # def visualize_feature_family(selected_families, current_subject):
703
- # if not selected_families:
704
- # return "Please select a feature family to visualize.", None, "", []
705
-
706
- # selected_family = selected_families[0] # Take the first selected family
707
- # family_analysis = subject_data[current_subject]['family_analysis']
708
-
709
- # family_data = next((family for family in family_analysis if family['superfeature'] == selected_family), None)
710
- # if not family_data:
711
- # return "Invalid feature family selected.", None, "", []
712
-
713
- # output = f"# {family_data['superfeature']}\n\n"
714
- # output += f"## Super Reasoning\n{family_data['super_reasoning']}\n\n"
715
-
716
- # # Create DataFrame
717
- # df_data = [
718
- # {
719
- # "Feature": family_data['superfeature'],
720
- # "F1 Score": family_data['family_f1'],
721
- # "Pearson Correlation": family_data['family_pearson']
722
- # }
723
- # ]
724
-
725
- # for name, f1, pearson in zip(family_data['feature_names'], family_data['feature_f1'], family_data['feature_pearson']):
726
- # df_data.append({
727
- # "Feature": name,
728
- # "F1 Score": f1,
729
- # "Pearson Correlation": pearson
730
- # })
731
-
732
- # df = pd.DataFrame(df_data)
733
-
734
- # return output, df, "", [] # Return empty string for search box and empty list for checkbox
735
-
736
  def visualize_feature_family(selected_families, current_subject):
737
  if not selected_families:
738
  return "Please select a feature family to visualize.", None, "", []
@@ -753,11 +630,6 @@ def create_interface():
753
  "F1 Score": round(family_data['family_f1'], 2),
754
  "Pearson Correlation": round(family_data['family_pearson'], 4)
755
  },
756
- # {
757
- # "Feature": "## Child Features",
758
- # "F1 Score": None,
759
- # "Pearson Correlation": None
760
- # }
761
  ]
762
 
763
  for name, f1, pearson in zip(family_data['feature_names'], family_data['feature_f1'], family_data['feature_pearson']):
 
55
 
56
  # Load configuration and initialize OpenAI client
57
  download_all_files()
 
 
58
 
59
  # Load the API key from the environment variable
60
  api_key = os.getenv('openai_key')
 
98
  decoder = weights['decoder.weight'].cpu().numpy()
99
  del weights
100
 
 
 
 
 
101
  with open(family_analysis_path, 'r') as f:
102
  family_analysis = json.load(f)
103
 
 
527
  visualize_button = gr.Button("Visualize Feature")
528
 
529
  feature_info = gr.Markdown()
 
 
 
 
 
530
 
531
  abstracts_heading = gr.Markdown("## Top 5 Abstracts")
532
  top_abstracts = gr.Dataframe(
 
586
  inputs=[feature_matches, subject],
587
  outputs=[feature_info, top_abstracts, top_correlated, bottom_correlated, co_occurring_features, activation_dist, feature_search, feature_matches]
588
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
589
 
590
  with gr.Tab("Feature Families"):
591
  gr.Markdown("# Feature Families")
 
601
  datatype=["markdown", "number", "number"],
602
  label="Family and Child Features"
603
  )
604
+
 
 
 
 
605
 
606
  def search_feature_families(search_text, current_subject):
607
  family_analysis = subject_data[current_subject]['family_analysis']
 
610
  matches = [family['superfeature'] for family in family_analysis if search_text.lower() in family['superfeature'].lower()]
611
  return gr.CheckboxGroup(choices=matches[:10]) # Limit to top 10 matches
612
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
613
  def visualize_feature_family(selected_families, current_subject):
614
  if not selected_families:
615
  return "Please select a feature family to visualize.", None, "", []
 
630
  "F1 Score": round(family_data['family_f1'], 2),
631
  "Pearson Correlation": round(family_data['family_pearson'], 4)
632
  },
 
 
 
 
 
633
  ]
634
 
635
  for name, f1, pearson in zip(family_data['feature_names'], family_data['feature_f1'], family_data['feature_pearson']):