DrSyedFaizan commited on
Commit
72cd162
Β·
verified Β·
1 Parent(s): f155b04

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -14
app.py CHANGED
@@ -4,11 +4,12 @@ import numpy as np
4
  import pandas as pd
5
  import evaluate
6
  import gradio as gr
 
7
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
8
- from sklearn.metrics import accuracy_score, classification_report
9
  from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
10
- from dataclasses import dataclass, field
11
- from typing import List, Optional
12
 
13
  # Load Accuracy and F1-Score Metrics
14
  accuracy_metric = evaluate.load("accuracy")
@@ -22,23 +23,37 @@ MODEL_PATHS = {
22
  "DistilBERT": "distilbert-base-uncased"
23
  }
24
 
25
- # Load Reddit Mental Health Dataset
26
- def load_reddit_data(file_path):
27
- df = pd.read_csv(file_path)
28
- df = df.dropna(subset=["text", "label"]) # Ensure no missing values in relevant columns
29
- return df
 
 
 
 
 
 
 
 
 
 
 
30
 
31
- # Preprocess Dataset
32
- def preprocess_data(df, sample_size=100):
 
 
 
 
33
  df_sample = df.sample(n=sample_size, random_state=42) # Sample a subset
34
  test_texts = df_sample["text"].tolist()
35
- test_labels = df_sample["label"].tolist()
36
  return test_texts, test_labels
37
 
38
  # Function to evaluate models
39
  def evaluate_models(dataset_path):
40
- df = load_reddit_data(dataset_path)
41
- test_texts, test_labels = preprocess_data(df)
42
  results = []
43
 
44
  model_metadata = {
@@ -82,12 +97,13 @@ def evaluate_models(dataset_path):
82
  return pd.DataFrame(results)
83
 
84
  # Load and evaluate
85
- DATASET_PATH = "path/to/reddit_mental_health.csv"
86
  df_results = evaluate_models(DATASET_PATH)
87
 
88
  # Display results
89
  df_results
90
 
 
91
  # Initialize leaderboard with custom columns
92
  def init_leaderboard(dataframe):
93
  if dataframe is None or dataframe.empty:
 
4
  import pandas as pd
5
  import evaluate
6
  import gradio as gr
7
+ import re
8
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
9
+ from sklearn.metrics import accuracy_score
10
  from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
11
+ from dataclasses import dataclass
12
+ from typing import List
13
 
14
  # Load Accuracy and F1-Score Metrics
15
  accuracy_metric = evaluate.load("accuracy")
 
23
  "DistilBERT": "distilbert-base-uncased"
24
  }
25
 
26
+ # Label Mapping
27
+ LABEL_MAPPING = {
28
+ 0: "Stress",
29
+ 1: "Depression",
30
+ 2: "Bipolar disorder",
31
+ 3: "Personality disorder",
32
+ 4: "Anxiety"
33
+ }
34
+
35
+ # Function to clean text using regular expressions
36
+ def clean_text(text):
37
+ text = text.lower()
38
+ text = re.sub(r'http\S+', '', text) # Remove URLs
39
+ text = re.sub(r'\s+', ' ', text) # Remove excessive whitespace
40
+ text = re.sub(r'[^a-zA-Z0-9 ]', '', text) # Remove special characters
41
+ return text.strip()
42
 
43
+ # Load and preprocess Reddit Mental Health Dataset
44
+ def load_reddit_data(file_path, sample_size=100):
45
+ df = pd.read_csv(file_path)
46
+ df = df.dropna(subset=["text", "target"]) # Ensure no missing values in relevant columns
47
+ df = df.drop(columns=[df.columns[0], "title"]) # Drop index and title columns
48
+ df["text"] = df["text"].apply(clean_text) # Clean text column
49
  df_sample = df.sample(n=sample_size, random_state=42) # Sample a subset
50
  test_texts = df_sample["text"].tolist()
51
+ test_labels = df_sample["target"].tolist()
52
  return test_texts, test_labels
53
 
54
  # Function to evaluate models
55
  def evaluate_models(dataset_path):
56
+ test_texts, test_labels = load_reddit_data(dataset_path)
 
57
  results = []
58
 
59
  model_metadata = {
 
97
  return pd.DataFrame(results)
98
 
99
  # Load and evaluate
100
+ DATASET_PATH = "https://huggingface.co/spaces/DrSyedFaizan/mindBERTevaluation/resolve/main/rmhd.csv"
101
  df_results = evaluate_models(DATASET_PATH)
102
 
103
  # Display results
104
  df_results
105
 
106
+
107
  # Initialize leaderboard with custom columns
108
  def init_leaderboard(dataframe):
109
  if dataframe is None or dataframe.empty: