Spaces:

codemogul
/

tabibu-mh

Sleeping

App Files Files Community

deeksonparlma commited on Feb 20, 2023

Commit

15e0607

1 Parent(s): 1643835

update on model

Browse files

Files changed (8) hide show

.ipynb_checkpoints/model-checkpoint.ipynb +236 -16
.~lock.mental_health_bot.xlsx# +1 -0
excel-data.xls +0 -0
mental_health_bot.csv +0 -0
mental_health_bot.ods +0 -0
mental_health_bot.xlsx +0 -0
model.ipynb +196 -17
requirements.txt +3 -1

.ipynb_checkpoints/model-checkpoint.ipynb CHANGED Viewed

@@ -2,17 +2,90 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
    "id": "ace57031",
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Accuracy: 0.0\n",
-      "Prediction: ['Symptoms of depression include sadness, lack of energy, and loss of interest in activities.']\n"
-     ]
     }
    ],
    "source": [
@@ -20,14 +93,116 @@
     "from sklearn.model_selection import train_test_split\n",
     "from sklearn.linear_model import LogisticRegression\n",
     "from sklearn.metrics import accuracy_score\n",
-    "\n",
     "# Step 1: Collect and preprocess data\n",
-    "questions = [\"What are some symptoms of depression?\",\n",
-    "             \"How can I manage my anxiety?\",\n",
-    "             \"What are the treatments for bipolar disorder?\"]\n",
-    "responses = [\"Symptoms of depression include sadness, lack of energy, and loss of interest in activities.\",\n",
-    "             \"You can manage your anxiety through techniques such as deep breathing, meditation, and therapy.\",\n",
-    "             \"Treatments for bipolar disorder include medication, therapy, and lifestyle changes.\"]\n",
     "\n",
     "vectorizer = TfidfVectorizer()\n",
     "X = vectorizer.fit_transform(questions)\n",
@@ -42,16 +217,56 @@
     "# Step 4: Train the model\n",
     "model.fit(X_train, y_train)\n",
     "\n",
     "# Step 5: Evaluate the model\n",
     "y_pred = model.predict(X_test)\n",
     "accuracy = accuracy_score(y_test, y_pred)\n",
     "print(\"Accuracy:\", accuracy)\n",
     "\n",
     "# Step 6: Use the model to make predictions\n",
-    "new_question = \"What are the symptoms of anxiety?\"\n",
     "new_question_vector = vectorizer.transform([new_question])\n",
     "prediction = model.predict(new_question_vector)\n",
-    "print(\"Prediction:\", prediction)\n"
    ]
   }
  ],
@@ -72,6 +287,11 @@
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.10.7"
   }
  },
  "nbformat": 4,

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 8,
    "id": "ace57031",
    "metadata": {},
    "outputs": [
     {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Question_ID</th>\n",
+       "      <th>Questions</th>\n",
+       "      <th>Answers</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1590140</td>\n",
+       "      <td>What does it mean to have a mental illness?</td>\n",
+       "      <td>Mental illnesses are health conditions that di...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2110618</td>\n",
+       "      <td>Who does mental illness affect?</td>\n",
+       "      <td>It is estimated that mental illness affects 1 ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>6361820</td>\n",
+       "      <td>What causes mental illness?</td>\n",
+       "      <td>It is estimated that mental illness affects 1 ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>9434130</td>\n",
+       "      <td>What are some of the warning signs of mental i...</td>\n",
+       "      <td>Symptoms of mental health disorders vary depen...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>7657263</td>\n",
+       "      <td>Can people with mental illness recover?</td>\n",
+       "      <td>When healing from mental illness, early identi...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  Question_ID                                          Questions  \\\n",
+       "0     1590140        What does it mean to have a mental illness?   \n",
+       "1     2110618                    Who does mental illness affect?   \n",
+       "2     6361820                        What causes mental illness?   \n",
+       "3     9434130  What are some of the warning signs of mental i...   \n",
+       "4     7657263            Can people with mental illness recover?   \n",
+       "\n",
+       "                                             Answers  \n",
+       "0  Mental illnesses are health conditions that di...  \n",
+       "1  It is estimated that mental illness affects 1 ...  \n",
+       "2  It is estimated that mental illness affects 1 ...  \n",
+       "3  Symptoms of mental health disorders vary depen...  \n",
+       "4  When healing from mental illness, early identi...  "
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
     "from sklearn.model_selection import train_test_split\n",
     "from sklearn.linear_model import LogisticRegression\n",
     "from sklearn.metrics import accuracy_score\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import torch\n",
+    "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n",
+    "from huggingface_hub import notebook_login\n",
+    "# notebook_login()\n",
     "# Step 1: Collect and preprocess data\n",
+    "# Get all the questions from Questions column and responses from Questions column in the dataset data.csv\n",
+    "# questions = data[\"Questions\"].tolist()\n",
+    "# responses = data[\"Responses\"].tolist()\n",
+    "questions = []\n",
+    "responses = []\n",
+    "q_id = []\n",
+    "with open(\"mental_health_bot.csv\", \"r\") as f:\n",
+    "    for line in f:\n",
+    "        \n",
+    "        array = line.split(\",\") \n",
+    "        # questions.append(question)\n",
+    "        # responses.append(response)\n",
+    "        # q_id.append(question_id)\n",
+    "        try:\n",
+    "            question = array[1]\n",
+    "            response = array[2]\n",
+    "            question_id = array[0]\n",
+    "            questions.append(question)\n",
+    "            responses.append(response)\n",
+    "            q_id.append(question_id)\n",
+    "        except:\n",
+    "            pass\n",
+    "\n",
+    "data = pd.read_csv(\"data.csv\")\n",
+    "data.head()\n",
+    "        \n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "60e154b4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "missing values: Question_ID    0\n",
+      "Questions      0\n",
+      "Answers        0\n",
+      "dtype: int64\n"
+     ]
+    }
+   ],
+   "source": [
+    "print('missing values:', data.isnull().sum())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "41311468",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 149 entries, 0 to 148\n",
+      "Data columns (total 3 columns):\n",
+      " #   Column       Non-Null Count  Dtype \n",
+      "---  ------       --------------  ----- \n",
+      " 0   Question_ID  149 non-null    object\n",
+      " 1   Questions    149 non-null    object\n",
+      " 2   Answers      149 non-null    object\n",
+      "dtypes: object(3)\n",
+      "memory usage: 3.6+ KB\n",
+      "None\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(data.info())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "f6719ffa",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Accuracy: 0.03333333333333333\n"
+     ]
+    }
+   ],
+   "source": [
+    "# print(questions)\n",
+    "# print(responses)\n",
+    "\n",
+    "\n",
+    "# questions = [\"What are some symptoms of depression?\",\n",
+    "#              \"How can I manage my anxiety?\",\n",
+    "#              \"What are the treatments for bipolar disorder?\"]\n",
+    "# responses = [\"Symptoms of depression include sadness, lack of energy, and loss of interest in activities.\",\n",
+    "#              \"You can manage your anxiety through techniques such as deep breathing, meditation, and therapy.\",\n",
+    "#              \"Treatments for bipolar disorder include medication, therapy, and lifestyle changes.\"]\n",
     "\n",
     "vectorizer = TfidfVectorizer()\n",
     "X = vectorizer.fit_transform(questions)\n",
     "# Step 4: Train the model\n",
     "model.fit(X_train, y_train)\n",
     "\n",
+    "# model.push_to_hub(\"tabibu-ai/mental-health-chatbot\")\n",
+    "# pt_model = DistilBertForSequenceClassification.from_pretrained(\"model.ipynb\", from_tf=True)\n",
+    "# pt_model.save_pretrained(\"model.ipynb\")\n",
+    "# load model from hub\n",
+    "\n",
     "# Step 5: Evaluate the model\n",
     "y_pred = model.predict(X_test)\n",
     "accuracy = accuracy_score(y_test, y_pred)\n",
     "print(\"Accuracy:\", accuracy)\n",
     "\n",
     "# Step 6: Use the model to make predictions\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "d8d18524",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ask me anythingWho are you\n"
+     ]
+    }
+   ],
+   "source": [
+    "new_question = input(\"Ask me anything : \")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "e51d4ca5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Prediction: ['\"It is estimated that mental illness affects 1 in 5 adults in America']\n"
+     ]
+    }
+   ],
+   "source": [
     "new_question_vector = vectorizer.transform([new_question])\n",
     "prediction = model.predict(new_question_vector)\n",
+    "print(\"Prediction:\", prediction)"
    ]
   }
  ],
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.10.7"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
+   }
   }
  },
  "nbformat": 4,

.~lock.mental_health_bot.xlsx# ADDED Viewed

	@@ -0,0 +1 @@


1	+ ,dickson,dickson,20.02.2023 22:13,file:///home/dickson/.config/libreoffice/4;

excel-data.xls ADDED Viewed

The diff for this file is too large to render. See raw diff

mental_health_bot.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

mental_health_bot.ods ADDED Viewed

Binary file (85.9 kB). View file

mental_health_bot.xlsx ADDED Viewed

Binary file (55.6 kB). View file

model.ipynb CHANGED Viewed

@@ -2,17 +2,90 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 15,
    "id": "ace57031",
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Accuracy: 0.023255813953488372\n",
-      "Prediction: [' is a member of the BC Partners for Mental Health and Addictions Information. The institute is dedicated to the study of substance use in support of community-wide efforts aimed at providing all people with access to healthier lives']\n"
-     ]
     }
    ],
    "source": [
@@ -20,7 +93,12 @@
     "from sklearn.model_selection import train_test_split\n",
     "from sklearn.linear_model import LogisticRegression\n",
     "from sklearn.metrics import accuracy_score\n",
-    "\n",
     "# Step 1: Collect and preprocess data\n",
     "# Get all the questions from Questions column and responses from Questions column in the dataset data.csv\n",
     "# questions = data[\"Questions\"].tolist()\n",
@@ -28,7 +106,7 @@
     "questions = []\n",
     "responses = []\n",
     "q_id = []\n",
-    "with open(\"data.csv\", \"r\") as f:\n",
     "    for line in f:\n",
     "        \n",
     "        array = line.split(\",\") \n",
@@ -45,9 +123,74 @@
     "        except:\n",
     "            pass\n",
     "\n",
-    "\n",
-    "        \n",
-    "\n",
     "# print(questions)\n",
     "# print(responses)\n",
     "\n",
@@ -72,8 +215,9 @@
     "# Step 4: Train the model\n",
     "model.fit(X_train, y_train)\n",
     "\n",
-    "model.push_to_hub(\"tabibu-ai/mental-health-chatbot\")\n",
-    "\n",
     "# load model from hub\n",
     "\n",
     "# Step 5: Evaluate the model\n",
@@ -82,16 +226,51 @@
     "print(\"Accuracy:\", accuracy)\n",
     "\n",
     "# Step 6: Use the model to make predictions\n",
-    "new_question = \"I feel sad\"\n",
     "new_question_vector = vectorizer.transform([new_question])\n",
     "prediction = model.predict(new_question_vector)\n",
-    "print(\"Prediction:\", prediction)\n"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 8,
    "id": "ace57031",
    "metadata": {},
    "outputs": [
     {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Question_ID</th>\n",
+       "      <th>Questions</th>\n",
+       "      <th>Answers</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1590140</td>\n",
+       "      <td>What does it mean to have a mental illness?</td>\n",
+       "      <td>Mental illnesses are health conditions that di...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2110618</td>\n",
+       "      <td>Who does mental illness affect?</td>\n",
+       "      <td>It is estimated that mental illness affects 1 ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>6361820</td>\n",
+       "      <td>What causes mental illness?</td>\n",
+       "      <td>It is estimated that mental illness affects 1 ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>9434130</td>\n",
+       "      <td>What are some of the warning signs of mental i...</td>\n",
+       "      <td>Symptoms of mental health disorders vary depen...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>7657263</td>\n",
+       "      <td>Can people with mental illness recover?</td>\n",
+       "      <td>When healing from mental illness, early identi...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  Question_ID                                          Questions  \\\n",
+       "0     1590140        What does it mean to have a mental illness?   \n",
+       "1     2110618                    Who does mental illness affect?   \n",
+       "2     6361820                        What causes mental illness?   \n",
+       "3     9434130  What are some of the warning signs of mental i...   \n",
+       "4     7657263            Can people with mental illness recover?   \n",
+       "\n",
+       "                                             Answers  \n",
+       "0  Mental illnesses are health conditions that di...  \n",
+       "1  It is estimated that mental illness affects 1 ...  \n",
+       "2  It is estimated that mental illness affects 1 ...  \n",
+       "3  Symptoms of mental health disorders vary depen...  \n",
+       "4  When healing from mental illness, early identi...  "
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
     "from sklearn.model_selection import train_test_split\n",
     "from sklearn.linear_model import LogisticRegression\n",
     "from sklearn.metrics import accuracy_score\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import torch\n",
+    "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n",
+    "from huggingface_hub import notebook_login\n",
+    "# notebook_login()\n",
     "# Step 1: Collect and preprocess data\n",
     "# Get all the questions from Questions column and responses from Questions column in the dataset data.csv\n",
     "# questions = data[\"Questions\"].tolist()\n",
     "questions = []\n",
     "responses = []\n",
     "q_id = []\n",
+    "with open(\"mental_health_bot.csv\", \"r\") as f:\n",
     "    for line in f:\n",
     "        \n",
     "        array = line.split(\",\") \n",
     "        except:\n",
     "            pass\n",
     "\n",
+    "data = pd.read_csv(\"data.csv\")\n",
+    "data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "8f51e39d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "missing values: Question_ID    0\n",
+      "Questions      0\n",
+      "Answers        0\n",
+      "dtype: int64\n"
+     ]
+    }
+   ],
+   "source": [
+    "print('missing values:', data.isnull().sum())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "1d697a39",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 149 entries, 0 to 148\n",
+      "Data columns (total 3 columns):\n",
+      " #   Column       Non-Null Count  Dtype \n",
+      "---  ------       --------------  ----- \n",
+      " 0   Question_ID  149 non-null    object\n",
+      " 1   Questions    149 non-null    object\n",
+      " 2   Answers      149 non-null    object\n",
+      "dtypes: object(3)\n",
+      "memory usage: 3.6+ KB\n",
+      "None\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(data.info())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "c5dde0e4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Accuracy: 0.03333333333333333\n"
+     ]
+    }
+   ],
+   "source": [
     "# print(questions)\n",
     "# print(responses)\n",
     "\n",
     "# Step 4: Train the model\n",
     "model.fit(X_train, y_train)\n",
     "\n",
+    "# model.push_to_hub(\"tabibu-ai/mental-health-chatbot\")\n",
+    "pt_model = DistilBertForSequenceClassification.from_pretrained(\"model.ipynb\", from_tf=True)\n",
+    "pt_model.save_pretrained(\"model.ipynb\")\n",
     "# load model from hub\n",
     "\n",
     "# Step 5: Evaluate the model\n",
     "print(\"Accuracy:\", accuracy)\n",
     "\n",
     "# Step 6: Use the model to make predictions\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "14406312",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ask me anything : I feel sad\n"
+     ]
+    }
+   ],
+   "source": [
+    "new_question = input(\"Ask me anything : \")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "6b9198db",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Prediction: ['\"It is estimated that mental illness affects 1 in 5 adults in America']\n"
+     ]
+    }
+   ],
+   "source": [
     "new_question_vector = vectorizer.transform([new_question])\n",
     "prediction = model.predict(new_question_vector)\n",
+    "print(\"Prediction:\", prediction)"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },

requirements.txt CHANGED Viewed

@@ -1,2 +1,4 @@
 torch
-transformers

 torch
+transformers
+huggingface_hub
+tensorflow