deeksonparlma commited on
Commit
1a88edc
·
1 Parent(s): 15e0607

migrate model to pickle

Browse files
Files changed (4) hide show
  1. .~lock.mental_health_bot.xlsx# +0 -1
  2. model.ipynb +32 -32
  3. model.pkl +3 -0
  4. vectorizer.pkl +3 -0
.~lock.mental_health_bot.xlsx# DELETED
@@ -1 +0,0 @@
1
- ,dickson,dickson,20.02.2023 22:13,file:///home/dickson/.config/libreoffice/4;
 
 
model.ipynb CHANGED
@@ -2,10 +2,19 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 8,
6
  "id": "ace57031",
7
  "metadata": {},
8
  "outputs": [
 
 
 
 
 
 
 
 
 
9
  {
10
  "data": {
11
  "text/html": [
@@ -83,7 +92,7 @@
83
  "4 When healing from mental illness, early identi... "
84
  ]
85
  },
86
- "execution_count": 8,
87
  "metadata": {},
88
  "output_type": "execute_result"
89
  }
@@ -96,8 +105,11 @@
96
  "import pandas as pd\n",
97
  "import numpy as np\n",
98
  "import torch\n",
99
- "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n",
100
- "from huggingface_hub import notebook_login\n",
 
 
 
101
  "# notebook_login()\n",
102
  "# Step 1: Collect and preprocess data\n",
103
  "# Get all the questions from Questions column and responses from Questions column in the dataset data.csv\n",
@@ -129,7 +141,7 @@
129
  },
130
  {
131
  "cell_type": "code",
132
- "execution_count": 9,
133
  "id": "8f51e39d",
134
  "metadata": {},
135
  "outputs": [
@@ -150,7 +162,7 @@
150
  },
151
  {
152
  "cell_type": "code",
153
- "execution_count": 10,
154
  "id": "1d697a39",
155
  "metadata": {},
156
  "outputs": [
@@ -178,7 +190,7 @@
178
  },
179
  {
180
  "cell_type": "code",
181
- "execution_count": 12,
182
  "id": "c5dde0e4",
183
  "metadata": {},
184
  "outputs": [
@@ -202,7 +214,10 @@
202
  "# \"You can manage your anxiety through techniques such as deep breathing, meditation, and therapy.\",\n",
203
  "# \"Treatments for bipolar disorder include medication, therapy, and lifestyle changes.\"]\n",
204
  "\n",
205
- "vectorizer = TfidfVectorizer()\n",
 
 
 
206
  "X = vectorizer.fit_transform(questions)\n",
207
  "y = responses\n",
208
  "\n",
@@ -210,14 +225,15 @@
210
  "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
211
  "\n",
212
  "# Step 3: Choose a machine learning algorithm\n",
213
- "model = LogisticRegression()\n",
 
214
  "\n",
215
  "# Step 4: Train the model\n",
216
  "model.fit(X_train, y_train)\n",
217
  "\n",
218
  "# model.push_to_hub(\"tabibu-ai/mental-health-chatbot\")\n",
219
- "pt_model = DistilBertForSequenceClassification.from_pretrained(\"model.ipynb\", from_tf=True)\n",
220
- "pt_model.save_pretrained(\"model.ipynb\")\n",
221
  "# load model from hub\n",
222
  "\n",
223
  "# Step 5: Evaluate the model\n",
@@ -231,36 +247,20 @@
231
  },
232
  {
233
  "cell_type": "code",
234
- "execution_count": 18,
235
  "id": "14406312",
236
  "metadata": {},
237
- "outputs": [
238
- {
239
- "name": "stdout",
240
- "output_type": "stream",
241
- "text": [
242
- "Ask me anything : I feel sad\n"
243
- ]
244
- }
245
- ],
246
  "source": [
247
- "new_question = input(\"Ask me anything : \")\n"
248
  ]
249
  },
250
  {
251
  "cell_type": "code",
252
- "execution_count": 17,
253
  "id": "6b9198db",
254
  "metadata": {},
255
- "outputs": [
256
- {
257
- "name": "stdout",
258
- "output_type": "stream",
259
- "text": [
260
- "Prediction: ['\"It is estimated that mental illness affects 1 in 5 adults in America']\n"
261
- ]
262
- }
263
- ],
264
  "source": [
265
  "new_question_vector = vectorizer.transform([new_question])\n",
266
  "prediction = model.predict(new_question_vector)\n",
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "id": "ace57031",
7
  "metadata": {},
8
  "outputs": [
9
+ {
10
+ "name": "stderr",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "2023-02-21 17:41:49.330107: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n",
14
+ "2023-02-21 17:41:49.330992: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n",
15
+ "2023-02-21 17:41:49.331010: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n"
16
+ ]
17
+ },
18
  {
19
  "data": {
20
  "text/html": [
 
92
  "4 When healing from mental illness, early identi... "
93
  ]
94
  },
95
+ "execution_count": 1,
96
  "metadata": {},
97
  "output_type": "execute_result"
98
  }
 
105
  "import pandas as pd\n",
106
  "import numpy as np\n",
107
  "import torch\n",
108
+ "import pickle\n",
109
+ "\n",
110
+ "# from transformers import AutoTokenizer, AutoModelForSequenceClassification,BertTokenizer, TFBertForSequenceClassification\n",
111
+ "# from huggingface_hub import notebook_login\n",
112
+ "import tensorflow as tf\n",
113
  "# notebook_login()\n",
114
  "# Step 1: Collect and preprocess data\n",
115
  "# Get all the questions from Questions column and responses from Questions column in the dataset data.csv\n",
 
141
  },
142
  {
143
  "cell_type": "code",
144
+ "execution_count": 8,
145
  "id": "8f51e39d",
146
  "metadata": {},
147
  "outputs": [
 
162
  },
163
  {
164
  "cell_type": "code",
165
+ "execution_count": 9,
166
  "id": "1d697a39",
167
  "metadata": {},
168
  "outputs": [
 
190
  },
191
  {
192
  "cell_type": "code",
193
+ "execution_count": 11,
194
  "id": "c5dde0e4",
195
  "metadata": {},
196
  "outputs": [
 
214
  "# \"You can manage your anxiety through techniques such as deep breathing, meditation, and therapy.\",\n",
215
  "# \"Treatments for bipolar disorder include medication, therapy, and lifestyle changes.\"]\n",
216
  "\n",
217
+ "\n",
218
+ "\n",
219
+ "# vectorizer = TfidfVectorizer()\n",
220
+ "vectorizer = pickle.load(open(\"vectorizer.pkl\", \"rb\"))\n",
221
  "X = vectorizer.fit_transform(questions)\n",
222
  "y = responses\n",
223
  "\n",
 
225
  "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
226
  "\n",
227
  "# Step 3: Choose a machine learning algorithm\n",
228
+ "# model = LogisticRegression()\n",
229
+ "model = pickle.load(open(\"model.pkl\", \"rb\"))\n",
230
  "\n",
231
  "# Step 4: Train the model\n",
232
  "model.fit(X_train, y_train)\n",
233
  "\n",
234
  "# model.push_to_hub(\"tabibu-ai/mental-health-chatbot\")\n",
235
+ "# pt_model = DistilBertForSequenceClassification.from_pretrained(\"model.ipynb\", from_tf=True)\n",
236
+ "# pt_model.save_pretrained(\"model.pt\")\n",
237
  "# load model from hub\n",
238
  "\n",
239
  "# Step 5: Evaluate the model\n",
 
247
  },
248
  {
249
  "cell_type": "code",
250
+ "execution_count": 12,
251
  "id": "14406312",
252
  "metadata": {},
253
+ "outputs": [],
 
 
 
 
 
 
 
 
254
  "source": [
255
+ "new_question = \"What are the symptoms of depression?\"\n"
256
  ]
257
  },
258
  {
259
  "cell_type": "code",
260
+ "execution_count": null,
261
  "id": "6b9198db",
262
  "metadata": {},
263
+ "outputs": [],
 
 
 
 
 
 
 
 
264
  "source": [
265
  "new_question_vector = vectorizer.transform([new_question])\n",
266
  "prediction = model.predict(new_question_vector)\n",
model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b1f64b718871770f1106973cc217f454b421b8f8a78daa12c743b43878448cf
3
+ size 2118910
vectorizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa0a144d5a7bac919c542bc480830f32bc427b541482de601ad9c87dd095c976
3
+ size 26406