Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -4,11 +4,12 @@ import numpy as np
|
|
4 |
import pandas as pd
|
5 |
import evaluate
|
6 |
import gradio as gr
|
|
|
7 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
8 |
-
from sklearn.metrics import accuracy_score
|
9 |
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
|
10 |
-
from dataclasses import dataclass
|
11 |
-
from typing import List
|
12 |
|
13 |
# Load Accuracy and F1-Score Metrics
|
14 |
accuracy_metric = evaluate.load("accuracy")
|
@@ -22,23 +23,37 @@ MODEL_PATHS = {
|
|
22 |
"DistilBERT": "distilbert-base-uncased"
|
23 |
}
|
24 |
|
25 |
-
#
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
-
#
|
32 |
-
def
|
|
|
|
|
|
|
|
|
33 |
df_sample = df.sample(n=sample_size, random_state=42) # Sample a subset
|
34 |
test_texts = df_sample["text"].tolist()
|
35 |
-
test_labels = df_sample["
|
36 |
return test_texts, test_labels
|
37 |
|
38 |
# Function to evaluate models
|
39 |
def evaluate_models(dataset_path):
|
40 |
-
|
41 |
-
test_texts, test_labels = preprocess_data(df)
|
42 |
results = []
|
43 |
|
44 |
model_metadata = {
|
@@ -82,12 +97,13 @@ def evaluate_models(dataset_path):
|
|
82 |
return pd.DataFrame(results)
|
83 |
|
84 |
# Load and evaluate
|
85 |
-
DATASET_PATH = "
|
86 |
df_results = evaluate_models(DATASET_PATH)
|
87 |
|
88 |
# Display results
|
89 |
df_results
|
90 |
|
|
|
91 |
# Initialize leaderboard with custom columns
|
92 |
def init_leaderboard(dataframe):
|
93 |
if dataframe is None or dataframe.empty:
|
|
|
4 |
import pandas as pd
|
5 |
import evaluate
|
6 |
import gradio as gr
|
7 |
+
import re
|
8 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
9 |
+
from sklearn.metrics import accuracy_score
|
10 |
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
|
11 |
+
from dataclasses import dataclass
|
12 |
+
from typing import List
|
13 |
|
14 |
# Load Accuracy and F1-Score Metrics
|
15 |
accuracy_metric = evaluate.load("accuracy")
|
|
|
23 |
"DistilBERT": "distilbert-base-uncased"
|
24 |
}
|
25 |
|
26 |
+
# Label Mapping
|
27 |
+
LABEL_MAPPING = {
|
28 |
+
0: "Stress",
|
29 |
+
1: "Depression",
|
30 |
+
2: "Bipolar disorder",
|
31 |
+
3: "Personality disorder",
|
32 |
+
4: "Anxiety"
|
33 |
+
}
|
34 |
+
|
35 |
+
# Function to clean text using regular expressions
|
36 |
+
def clean_text(text):
|
37 |
+
text = text.lower()
|
38 |
+
text = re.sub(r'http\S+', '', text) # Remove URLs
|
39 |
+
text = re.sub(r'\s+', ' ', text) # Remove excessive whitespace
|
40 |
+
text = re.sub(r'[^a-zA-Z0-9 ]', '', text) # Remove special characters
|
41 |
+
return text.strip()
|
42 |
|
43 |
+
# Load and preprocess Reddit Mental Health Dataset
|
44 |
+
def load_reddit_data(file_path, sample_size=100):
|
45 |
+
df = pd.read_csv(file_path)
|
46 |
+
df = df.dropna(subset=["text", "target"]) # Ensure no missing values in relevant columns
|
47 |
+
df = df.drop(columns=[df.columns[0], "title"]) # Drop index and title columns
|
48 |
+
df["text"] = df["text"].apply(clean_text) # Clean text column
|
49 |
df_sample = df.sample(n=sample_size, random_state=42) # Sample a subset
|
50 |
test_texts = df_sample["text"].tolist()
|
51 |
+
test_labels = df_sample["target"].tolist()
|
52 |
return test_texts, test_labels
|
53 |
|
54 |
# Function to evaluate models
|
55 |
def evaluate_models(dataset_path):
|
56 |
+
test_texts, test_labels = load_reddit_data(dataset_path)
|
|
|
57 |
results = []
|
58 |
|
59 |
model_metadata = {
|
|
|
97 |
return pd.DataFrame(results)
|
98 |
|
99 |
# Load and evaluate
|
100 |
+
DATASET_PATH = "https://huggingface.co/spaces/DrSyedFaizan/mindBERTevaluation/resolve/main/rmhd.csv"
|
101 |
df_results = evaluate_models(DATASET_PATH)
|
102 |
|
103 |
# Display results
|
104 |
df_results
|
105 |
|
106 |
+
|
107 |
# Initialize leaderboard with custom columns
|
108 |
def init_leaderboard(dataframe):
|
109 |
if dataframe is None or dataframe.empty:
|