Spaces:

charlieoneill
/

saerch.ai

Running

App Files Files Community

charlieoneill commited on Jul 31

Commit

3187d23

•

1 Parent(s): 5df6c06

yep

Browse files

Files changed (2) hide show

.gitignore +1 -0
app.py +1 -129

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ data/

app.py CHANGED Viewed

@@ -55,8 +55,6 @@ def download_all_files():
 # Load configuration and initialize OpenAI client
 download_all_files()
-# config = yaml.safe_load(open('../config.yaml', 'r'))
-# client = OpenAI(api_key=config['jwu_openai_key'])
 # Load the API key from the environment variable
 api_key = os.getenv('openai_key')
@@ -100,10 +98,6 @@ def load_subject_data(subject):
  decoder = weights['decoder.weight'].cpu().numpy()
  del weights
- # # Load feature families
- # with open(families_path, 'r') as f:
- # feature_families = json.load(f)
  with open(family_analysis_path, 'r') as f:
  family_analysis = json.load(f)
@@ -533,11 +527,6 @@ def create_interface():
  visualize_button = gr.Button("Visualize Feature")
  feature_info = gr.Markdown()
- # abstracts_heading = gr.Markdown("## Top 5 Abstracts")
- # top_abstracts = gr.Dataframe(
- # headers=["Title", "Activation value"],
- # interactive=False
- # )
  abstracts_heading = gr.Markdown("## Top 5 Abstracts")
  top_abstracts = gr.Dataframe(
@@ -597,46 +586,6 @@ def create_interface():
  inputs=[feature_matches, subject],
  outputs=[feature_info, top_abstracts, top_correlated, bottom_correlated, co_occurring_features, activation_dist, feature_search, feature_matches]
  )
- # with gr.Row():
- # feature_search = gr.Textbox(label="Search Feature Labels")
- # feature_matches = gr.CheckboxGroup(label="Matching Features", choices=[])
- # visualize_button = gr.Button("Visualize Feature")
- # feature_info = gr.Markdown()
- # abstracts_heading = gr.Markdown("## Top 5 Abstracts")
- # top_abstracts = gr.Dataframe(
- # headers=["Title", "Activation value"],
- # datatype=["markdown", "number"],
- # interactive=False,
- # wrap=True
- # )
- # gr.Markdown("## Correlated Features")
- # with gr.Row():
- # with gr.Column(scale=1):
- # gr.Markdown("### Top 5 Correlated Features")
- # top_correlated = gr.Dataframe(
- # headers=["Feature", "Cosine similarity"],
- # interactive=False
- # )
- # with gr.Column(scale=1):
- # gr.Markdown("### Bottom 5 Correlated Features")
- # bottom_correlated = gr.Dataframe(
- # headers=["Feature", "Cosine similarity"],
- # interactive=False
- # )
- # with gr.Row():
- # with gr.Column(scale=1):
- # gr.Markdown("## Top 5 Co-occurring Features")
- # co_occurring_features = gr.Dataframe(
- # headers=["Feature", "Co-occurrences"],
- # interactive=False
- # )
- # with gr.Column(scale=1):
- # gr.Markdown(f"## Activation Value Distribution")
- # activation_dist = gr.Plot()
  with gr.Tab("Feature Families"):
  gr.Markdown("# Feature Families")
@@ -652,11 +601,7 @@ def create_interface():
  datatype=["markdown", "number", "number"],
  label="Family and Child Features"
  )
- # family_dataframe = gr.Dataframe(
- # headers=["Feature", "F1 Score", "Pearson Correlation"],
- # datatype=["str", "number", "number"],
- # label="Family and Child Features"
- # )
  def search_feature_families(search_text, current_subject):
  family_analysis = subject_data[current_subject]['family_analysis']
@@ -665,74 +610,6 @@ def create_interface():
  matches = [family['superfeature'] for family in family_analysis if search_text.lower() in family['superfeature'].lower()]
  return gr.CheckboxGroup(choices=matches[:10]) # Limit to top 10 matches
- # def visualize_feature_family(selected_families, current_subject):
- # if not selected_families:
- # return "Please select a feature family to visualize.", None
- # selected_family = selected_families[0] # Take the first selected family
- # family_analysis = subject_data[current_subject]['family_analysis']
- # family_data = next((family for family in family_analysis if family['superfeature'] == selected_family), None)
- # if not family_data:
- # return "Invalid feature family selected.", None
- # output = f"# {family_data['superfeature']}\n\n"
- # output += f"## Super Reasoning\n{family_data['super_reasoning']}\n\n"
- # # Create DataFrame
- # df_data = [
- # {
- # "Feature": family_data['superfeature'],
- # "F1 Score": family_data['family_f1'],
- # "Pearson Correlation": family_data['family_pearson']
- # }
- # ]
- # for name, f1, pearson in zip(family_data['feature_names'], family_data['feature_f1'], family_data['feature_pearson']):
- # df_data.append({
- # "Feature": name,
- # "F1 Score": f1,
- # "Pearson Correlation": pearson
- # })
- # df = pd.DataFrame(df_data)
- # return output, df
- # def visualize_feature_family(selected_families, current_subject):
- # if not selected_families:
- # return "Please select a feature family to visualize.", None, "", []
- # selected_family = selected_families[0] # Take the first selected family
- # family_analysis = subject_data[current_subject]['family_analysis']
- # family_data = next((family for family in family_analysis if family['superfeature'] == selected_family), None)
- # if not family_data:
- # return "Invalid feature family selected.", None, "", []
- # output = f"# {family_data['superfeature']}\n\n"
- # output += f"## Super Reasoning\n{family_data['super_reasoning']}\n\n"
- # # Create DataFrame
- # df_data = [
- # {
- # "Feature": family_data['superfeature'],
- # "F1 Score": family_data['family_f1'],
- # "Pearson Correlation": family_data['family_pearson']
- # }
- # ]
- # for name, f1, pearson in zip(family_data['feature_names'], family_data['feature_f1'], family_data['feature_pearson']):
- # df_data.append({
- # "Feature": name,
- # "F1 Score": f1,
- # "Pearson Correlation": pearson
- # })
- # df = pd.DataFrame(df_data)
- # return output, df, "", [] # Return empty string for search box and empty list for checkbox
  def visualize_feature_family(selected_families, current_subject):
  if not selected_families:
  return "Please select a feature family to visualize.", None, "", []
@@ -753,11 +630,6 @@ def create_interface():
  "F1 Score": round(family_data['family_f1'], 2),
  "Pearson Correlation": round(family_data['family_pearson'], 4)
  },
- # {
- # "Feature": "## Child Features",
- # "F1 Score": None,
- # "Pearson Correlation": None
- # }
  ]
  for name, f1, pearson in zip(family_data['feature_names'], family_data['feature_f1'], family_data['feature_pearson']):

 # Load configuration and initialize OpenAI client
 download_all_files()
 # Load the API key from the environment variable
 api_key = os.getenv('openai_key')
  decoder = weights['decoder.weight'].cpu().numpy()
  del weights
  with open(family_analysis_path, 'r') as f:
  family_analysis = json.load(f)
  visualize_button = gr.Button("Visualize Feature")
  feature_info = gr.Markdown()
  abstracts_heading = gr.Markdown("## Top 5 Abstracts")
  top_abstracts = gr.Dataframe(
  inputs=[feature_matches, subject],
  outputs=[feature_info, top_abstracts, top_correlated, bottom_correlated, co_occurring_features, activation_dist, feature_search, feature_matches]
  )
  with gr.Tab("Feature Families"):
  gr.Markdown("# Feature Families")
  datatype=["markdown", "number", "number"],
  label="Family and Child Features"
  )
  def search_feature_families(search_text, current_subject):
  family_analysis = subject_data[current_subject]['family_analysis']
  matches = [family['superfeature'] for family in family_analysis if search_text.lower() in family['superfeature'].lower()]
  return gr.CheckboxGroup(choices=matches[:10]) # Limit to top 10 matches
  def visualize_feature_family(selected_families, current_subject):
  if not selected_families:
  return "Please select a feature family to visualize.", None, "", []
  "F1 Score": round(family_data['family_f1'], 2),
  "Pearson Correlation": round(family_data['family_pearson'], 4)
  },
  ]
  for name, f1, pearson in zip(family_data['feature_names'], family_data['feature_f1'], family_data['feature_pearson']):