Spaces:

DrSyedFaizan
/

mindBERTevaluation

Runtime error

App Files Files Community

DrSyedFaizan commited on Mar 2

Commit

05948f9

verified ·

1 Parent(s): be53738

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -41

app.py CHANGED Viewed

@@ -22,42 +22,25 @@ MODEL_PATHS = {
     "DistilBERT": "distilbert-base-uncased"
 }
-# Load Test Dataset (Example: Reddit Mental Health)
-test_texts = [
-    "I feel so anxious and panicked all the time.",
-    "I'm feeling absolutely wonderful today!",
-    "I don't think I can go on anymore, I feel suicidal.",
-    "Lately, I have mood swings that I can't explain.",
-    "I feel so stressed out about everything."
-]
-test_labels = [0, 3, 6, 1, 5]  # Anxiety, Normal, Suicidal, Bipolar, Stress
-# Define column structure for leaderboard
-@dataclass
-class ModelEvalColumn:
-    name: str
-    type: str
-    displayed_by_default: bool = True
-    never_hidden: bool = False
-    hidden: bool = False
-# Define the columns for your leaderboard
-fields = lambda cls: [
-    ModelEvalColumn(name="model", type="str", never_hidden=True),
-    ModelEvalColumn(name="model_type", type="str"),
-    ModelEvalColumn(name="precision", type="str"),
-    ModelEvalColumn(name="params", type="number"),
-    ModelEvalColumn(name="accuracy", type="number"),
-    ModelEvalColumn(name="f1_score", type="number"),
-    ModelEvalColumn(name="inference_time", type="number"),
-    ModelEvalColumn(name="license", type="str", displayed_by_default=False),
-]
-# Function to evaluate models and format for leaderboard
-def evaluate_models():
     results = []
-    # Model metadata (you would normally get this from model card or API)
     model_metadata = {
         "MindBERT": {"model_type": "BERT", "precision": "float16", "params": 0.11, "license": "MIT"},
         "BERT-base": {"model_type": "BERT", "precision": "float16", "params": 0.11, "license": "Apache-2.0"},
@@ -67,15 +50,12 @@ def evaluate_models():
     for model_name, model_path in MODEL_PATHS.items():
         print(f"Evaluating {model_name}...")
-        # Load Tokenizer and Model
         tokenizer = AutoTokenizer.from_pretrained(model_path)
         model = AutoModelForSequenceClassification.from_pretrained(model_path)
         model.eval()
-        # Tokenize Test Data
         inputs = tokenizer(test_texts, padding=True, truncation=True, return_tensors="pt")
-        # Measure Inference Time
         start_time = time.time()
         with torch.no_grad():
             outputs = model(**inputs)
@@ -83,17 +63,15 @@ def evaluate_models():
             predictions = torch.argmax(logits, dim=1).numpy()
         end_time = time.time()
-        # Compute Metrics
         accuracy = accuracy_score(test_labels, predictions)
         f1_score = f1_metric.compute(predictions=predictions, references=test_labels, average="macro")["f1"]
         inference_time = round(end_time - start_time, 4)
-        # Store Results with additional metadata needed for leaderboard
         result = {
             "model": model_name,
             "model_type": model_metadata[model_name]["model_type"],
             "precision": model_metadata[model_name]["precision"],
-            "params": model_metadata[model_name]["params"],
             "accuracy": round(accuracy, 4),
             "f1_score": round(f1_score, 4),
             "inference_time": inference_time,
@@ -101,9 +79,14 @@ def evaluate_models():
         }
         results.append(result)
-    # Convert to DataFrame
-    df_results = pd.DataFrame(results)
-    return df_results
 # Initialize leaderboard with custom columns
 def init_leaderboard(dataframe):

     "DistilBERT": "distilbert-base-uncased"
 }
+# Load Reddit Mental Health Dataset
+def load_reddit_data(file_path):
+    df = pd.read_csv(file_path)
+    df = df.dropna(subset=["text", "label"])  # Ensure no missing values in relevant columns
+    return df
+# Preprocess Dataset
+def preprocess_data(df, sample_size=100):
+    df_sample = df.sample(n=sample_size, random_state=42)  # Sample a subset
+    test_texts = df_sample["text"].tolist()
+    test_labels = df_sample["label"].tolist()
+    return test_texts, test_labels
+# Function to evaluate models
+def evaluate_models(dataset_path):
+    df = load_reddit_data(dataset_path)
+    test_texts, test_labels = preprocess_data(df)
     results = []
     model_metadata = {
         "MindBERT": {"model_type": "BERT", "precision": "float16", "params": 0.11, "license": "MIT"},
         "BERT-base": {"model_type": "BERT", "precision": "float16", "params": 0.11, "license": "Apache-2.0"},
     for model_name, model_path in MODEL_PATHS.items():
         print(f"Evaluating {model_name}...")
         tokenizer = AutoTokenizer.from_pretrained(model_path)
         model = AutoModelForSequenceClassification.from_pretrained(model_path)
         model.eval()
         inputs = tokenizer(test_texts, padding=True, truncation=True, return_tensors="pt")
         start_time = time.time()
         with torch.no_grad():
             outputs = model(**inputs)
             predictions = torch.argmax(logits, dim=1).numpy()
         end_time = time.time()
         accuracy = accuracy_score(test_labels, predictions)
         f1_score = f1_metric.compute(predictions=predictions, references=test_labels, average="macro")["f1"]
         inference_time = round(end_time - start_time, 4)
         result = {
             "model": model_name,
             "model_type": model_metadata[model_name]["model_type"],
             "precision": model_metadata[model_name]["precision"],
+            "params": model_metadata[model_name]["params"],
             "accuracy": round(accuracy, 4),
             "f1_score": round(f1_score, 4),
             "inference_time": inference_time,
         }
         results.append(result)
+    return pd.DataFrame(results)
+# Load and evaluate
+DATASET_PATH = "path/to/reddit_mental_health.csv"
+df_results = evaluate_models(DATASET_PATH)
+# Display results
+df_results
 # Initialize leaderboard with custom columns
 def init_leaderboard(dataframe):