karanzrk commited on
Commit
c58b615
·
1 Parent(s): 930af4a

Added Training Script

Browse files
Files changed (1) hide show
  1. training_GPT.ipynb +1823 -0
training_GPT.ipynb ADDED
@@ -0,0 +1,1823 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "gpuType": "T4"
8
+ },
9
+ "kernelspec": {
10
+ "name": "python3",
11
+ "display_name": "Python 3"
12
+ },
13
+ "language_info": {
14
+ "name": "python"
15
+ },
16
+ "accelerator": "GPU",
17
+ "widgets": {
18
+ "application/vnd.jupyter.widget-state+json": {
19
+ "1256055feca84c27b93af1ed7a828b9e": {
20
+ "model_module": "@jupyter-widgets/controls",
21
+ "model_name": "HBoxModel",
22
+ "model_module_version": "1.5.0",
23
+ "state": {
24
+ "_dom_classes": [],
25
+ "_model_module": "@jupyter-widgets/controls",
26
+ "_model_module_version": "1.5.0",
27
+ "_model_name": "HBoxModel",
28
+ "_view_count": null,
29
+ "_view_module": "@jupyter-widgets/controls",
30
+ "_view_module_version": "1.5.0",
31
+ "_view_name": "HBoxView",
32
+ "box_style": "",
33
+ "children": [
34
+ "IPY_MODEL_f772925eecb044f28d0f932ec0c00882",
35
+ "IPY_MODEL_810b09d05a6547278a41d0e4a940f8a2",
36
+ "IPY_MODEL_22eaa7679d9f42dc9257f76e7271408c"
37
+ ],
38
+ "layout": "IPY_MODEL_a3395bf7baf34bc78547a1562083da55"
39
+ }
40
+ },
41
+ "f772925eecb044f28d0f932ec0c00882": {
42
+ "model_module": "@jupyter-widgets/controls",
43
+ "model_name": "HTMLModel",
44
+ "model_module_version": "1.5.0",
45
+ "state": {
46
+ "_dom_classes": [],
47
+ "_model_module": "@jupyter-widgets/controls",
48
+ "_model_module_version": "1.5.0",
49
+ "_model_name": "HTMLModel",
50
+ "_view_count": null,
51
+ "_view_module": "@jupyter-widgets/controls",
52
+ "_view_module_version": "1.5.0",
53
+ "_view_name": "HTMLView",
54
+ "description": "",
55
+ "description_tooltip": null,
56
+ "layout": "IPY_MODEL_54086bb13def472bb0f72d4b978847e8",
57
+ "placeholder": "​",
58
+ "style": "IPY_MODEL_56a704b26e754480a52dbea6a27b4d2c",
59
+ "value": "Map: 100%"
60
+ }
61
+ },
62
+ "810b09d05a6547278a41d0e4a940f8a2": {
63
+ "model_module": "@jupyter-widgets/controls",
64
+ "model_name": "FloatProgressModel",
65
+ "model_module_version": "1.5.0",
66
+ "state": {
67
+ "_dom_classes": [],
68
+ "_model_module": "@jupyter-widgets/controls",
69
+ "_model_module_version": "1.5.0",
70
+ "_model_name": "FloatProgressModel",
71
+ "_view_count": null,
72
+ "_view_module": "@jupyter-widgets/controls",
73
+ "_view_module_version": "1.5.0",
74
+ "_view_name": "ProgressView",
75
+ "bar_style": "success",
76
+ "description": "",
77
+ "description_tooltip": null,
78
+ "layout": "IPY_MODEL_f61f2cffba5b4fc59b384dc6966449d2",
79
+ "max": 1148,
80
+ "min": 0,
81
+ "orientation": "horizontal",
82
+ "style": "IPY_MODEL_321469b5621f46e1bd4d0008340f2acc",
83
+ "value": 1148
84
+ }
85
+ },
86
+ "22eaa7679d9f42dc9257f76e7271408c": {
87
+ "model_module": "@jupyter-widgets/controls",
88
+ "model_name": "HTMLModel",
89
+ "model_module_version": "1.5.0",
90
+ "state": {
91
+ "_dom_classes": [],
92
+ "_model_module": "@jupyter-widgets/controls",
93
+ "_model_module_version": "1.5.0",
94
+ "_model_name": "HTMLModel",
95
+ "_view_count": null,
96
+ "_view_module": "@jupyter-widgets/controls",
97
+ "_view_module_version": "1.5.0",
98
+ "_view_name": "HTMLView",
99
+ "description": "",
100
+ "description_tooltip": null,
101
+ "layout": "IPY_MODEL_96dd7bd98af240e5a0a8398883253cad",
102
+ "placeholder": "​",
103
+ "style": "IPY_MODEL_bb18e321dc274d64931498b11fde1698",
104
+ "value": " 1148/1148 [00:01<00:00, 611.37 examples/s]"
105
+ }
106
+ },
107
+ "a3395bf7baf34bc78547a1562083da55": {
108
+ "model_module": "@jupyter-widgets/base",
109
+ "model_name": "LayoutModel",
110
+ "model_module_version": "1.2.0",
111
+ "state": {
112
+ "_model_module": "@jupyter-widgets/base",
113
+ "_model_module_version": "1.2.0",
114
+ "_model_name": "LayoutModel",
115
+ "_view_count": null,
116
+ "_view_module": "@jupyter-widgets/base",
117
+ "_view_module_version": "1.2.0",
118
+ "_view_name": "LayoutView",
119
+ "align_content": null,
120
+ "align_items": null,
121
+ "align_self": null,
122
+ "border": null,
123
+ "bottom": null,
124
+ "display": null,
125
+ "flex": null,
126
+ "flex_flow": null,
127
+ "grid_area": null,
128
+ "grid_auto_columns": null,
129
+ "grid_auto_flow": null,
130
+ "grid_auto_rows": null,
131
+ "grid_column": null,
132
+ "grid_gap": null,
133
+ "grid_row": null,
134
+ "grid_template_areas": null,
135
+ "grid_template_columns": null,
136
+ "grid_template_rows": null,
137
+ "height": null,
138
+ "justify_content": null,
139
+ "justify_items": null,
140
+ "left": null,
141
+ "margin": null,
142
+ "max_height": null,
143
+ "max_width": null,
144
+ "min_height": null,
145
+ "min_width": null,
146
+ "object_fit": null,
147
+ "object_position": null,
148
+ "order": null,
149
+ "overflow": null,
150
+ "overflow_x": null,
151
+ "overflow_y": null,
152
+ "padding": null,
153
+ "right": null,
154
+ "top": null,
155
+ "visibility": null,
156
+ "width": null
157
+ }
158
+ },
159
+ "54086bb13def472bb0f72d4b978847e8": {
160
+ "model_module": "@jupyter-widgets/base",
161
+ "model_name": "LayoutModel",
162
+ "model_module_version": "1.2.0",
163
+ "state": {
164
+ "_model_module": "@jupyter-widgets/base",
165
+ "_model_module_version": "1.2.0",
166
+ "_model_name": "LayoutModel",
167
+ "_view_count": null,
168
+ "_view_module": "@jupyter-widgets/base",
169
+ "_view_module_version": "1.2.0",
170
+ "_view_name": "LayoutView",
171
+ "align_content": null,
172
+ "align_items": null,
173
+ "align_self": null,
174
+ "border": null,
175
+ "bottom": null,
176
+ "display": null,
177
+ "flex": null,
178
+ "flex_flow": null,
179
+ "grid_area": null,
180
+ "grid_auto_columns": null,
181
+ "grid_auto_flow": null,
182
+ "grid_auto_rows": null,
183
+ "grid_column": null,
184
+ "grid_gap": null,
185
+ "grid_row": null,
186
+ "grid_template_areas": null,
187
+ "grid_template_columns": null,
188
+ "grid_template_rows": null,
189
+ "height": null,
190
+ "justify_content": null,
191
+ "justify_items": null,
192
+ "left": null,
193
+ "margin": null,
194
+ "max_height": null,
195
+ "max_width": null,
196
+ "min_height": null,
197
+ "min_width": null,
198
+ "object_fit": null,
199
+ "object_position": null,
200
+ "order": null,
201
+ "overflow": null,
202
+ "overflow_x": null,
203
+ "overflow_y": null,
204
+ "padding": null,
205
+ "right": null,
206
+ "top": null,
207
+ "visibility": null,
208
+ "width": null
209
+ }
210
+ },
211
+ "56a704b26e754480a52dbea6a27b4d2c": {
212
+ "model_module": "@jupyter-widgets/controls",
213
+ "model_name": "DescriptionStyleModel",
214
+ "model_module_version": "1.5.0",
215
+ "state": {
216
+ "_model_module": "@jupyter-widgets/controls",
217
+ "_model_module_version": "1.5.0",
218
+ "_model_name": "DescriptionStyleModel",
219
+ "_view_count": null,
220
+ "_view_module": "@jupyter-widgets/base",
221
+ "_view_module_version": "1.2.0",
222
+ "_view_name": "StyleView",
223
+ "description_width": ""
224
+ }
225
+ },
226
+ "f61f2cffba5b4fc59b384dc6966449d2": {
227
+ "model_module": "@jupyter-widgets/base",
228
+ "model_name": "LayoutModel",
229
+ "model_module_version": "1.2.0",
230
+ "state": {
231
+ "_model_module": "@jupyter-widgets/base",
232
+ "_model_module_version": "1.2.0",
233
+ "_model_name": "LayoutModel",
234
+ "_view_count": null,
235
+ "_view_module": "@jupyter-widgets/base",
236
+ "_view_module_version": "1.2.0",
237
+ "_view_name": "LayoutView",
238
+ "align_content": null,
239
+ "align_items": null,
240
+ "align_self": null,
241
+ "border": null,
242
+ "bottom": null,
243
+ "display": null,
244
+ "flex": null,
245
+ "flex_flow": null,
246
+ "grid_area": null,
247
+ "grid_auto_columns": null,
248
+ "grid_auto_flow": null,
249
+ "grid_auto_rows": null,
250
+ "grid_column": null,
251
+ "grid_gap": null,
252
+ "grid_row": null,
253
+ "grid_template_areas": null,
254
+ "grid_template_columns": null,
255
+ "grid_template_rows": null,
256
+ "height": null,
257
+ "justify_content": null,
258
+ "justify_items": null,
259
+ "left": null,
260
+ "margin": null,
261
+ "max_height": null,
262
+ "max_width": null,
263
+ "min_height": null,
264
+ "min_width": null,
265
+ "object_fit": null,
266
+ "object_position": null,
267
+ "order": null,
268
+ "overflow": null,
269
+ "overflow_x": null,
270
+ "overflow_y": null,
271
+ "padding": null,
272
+ "right": null,
273
+ "top": null,
274
+ "visibility": null,
275
+ "width": null
276
+ }
277
+ },
278
+ "321469b5621f46e1bd4d0008340f2acc": {
279
+ "model_module": "@jupyter-widgets/controls",
280
+ "model_name": "ProgressStyleModel",
281
+ "model_module_version": "1.5.0",
282
+ "state": {
283
+ "_model_module": "@jupyter-widgets/controls",
284
+ "_model_module_version": "1.5.0",
285
+ "_model_name": "ProgressStyleModel",
286
+ "_view_count": null,
287
+ "_view_module": "@jupyter-widgets/base",
288
+ "_view_module_version": "1.2.0",
289
+ "_view_name": "StyleView",
290
+ "bar_color": null,
291
+ "description_width": ""
292
+ }
293
+ },
294
+ "96dd7bd98af240e5a0a8398883253cad": {
295
+ "model_module": "@jupyter-widgets/base",
296
+ "model_name": "LayoutModel",
297
+ "model_module_version": "1.2.0",
298
+ "state": {
299
+ "_model_module": "@jupyter-widgets/base",
300
+ "_model_module_version": "1.2.0",
301
+ "_model_name": "LayoutModel",
302
+ "_view_count": null,
303
+ "_view_module": "@jupyter-widgets/base",
304
+ "_view_module_version": "1.2.0",
305
+ "_view_name": "LayoutView",
306
+ "align_content": null,
307
+ "align_items": null,
308
+ "align_self": null,
309
+ "border": null,
310
+ "bottom": null,
311
+ "display": null,
312
+ "flex": null,
313
+ "flex_flow": null,
314
+ "grid_area": null,
315
+ "grid_auto_columns": null,
316
+ "grid_auto_flow": null,
317
+ "grid_auto_rows": null,
318
+ "grid_column": null,
319
+ "grid_gap": null,
320
+ "grid_row": null,
321
+ "grid_template_areas": null,
322
+ "grid_template_columns": null,
323
+ "grid_template_rows": null,
324
+ "height": null,
325
+ "justify_content": null,
326
+ "justify_items": null,
327
+ "left": null,
328
+ "margin": null,
329
+ "max_height": null,
330
+ "max_width": null,
331
+ "min_height": null,
332
+ "min_width": null,
333
+ "object_fit": null,
334
+ "object_position": null,
335
+ "order": null,
336
+ "overflow": null,
337
+ "overflow_x": null,
338
+ "overflow_y": null,
339
+ "padding": null,
340
+ "right": null,
341
+ "top": null,
342
+ "visibility": null,
343
+ "width": null
344
+ }
345
+ },
346
+ "bb18e321dc274d64931498b11fde1698": {
347
+ "model_module": "@jupyter-widgets/controls",
348
+ "model_name": "DescriptionStyleModel",
349
+ "model_module_version": "1.5.0",
350
+ "state": {
351
+ "_model_module": "@jupyter-widgets/controls",
352
+ "_model_module_version": "1.5.0",
353
+ "_model_name": "DescriptionStyleModel",
354
+ "_view_count": null,
355
+ "_view_module": "@jupyter-widgets/base",
356
+ "_view_module_version": "1.2.0",
357
+ "_view_name": "StyleView",
358
+ "description_width": ""
359
+ }
360
+ },
361
+ "97cd971a20ca4510aa190fe9abdbbc03": {
362
+ "model_module": "@jupyter-widgets/controls",
363
+ "model_name": "HBoxModel",
364
+ "model_module_version": "1.5.0",
365
+ "state": {
366
+ "_dom_classes": [],
367
+ "_model_module": "@jupyter-widgets/controls",
368
+ "_model_module_version": "1.5.0",
369
+ "_model_name": "HBoxModel",
370
+ "_view_count": null,
371
+ "_view_module": "@jupyter-widgets/controls",
372
+ "_view_module_version": "1.5.0",
373
+ "_view_name": "HBoxView",
374
+ "box_style": "",
375
+ "children": [
376
+ "IPY_MODEL_5f1a9f6d14074f1fb43043f6df94b510",
377
+ "IPY_MODEL_269ab7a4f1064425a241faf4ebaf89fb",
378
+ "IPY_MODEL_72307cabb4ac4b109a69fdc40932f0ce"
379
+ ],
380
+ "layout": "IPY_MODEL_2fe091b9cc8d4bc89cba8938772db6a5"
381
+ }
382
+ },
383
+ "5f1a9f6d14074f1fb43043f6df94b510": {
384
+ "model_module": "@jupyter-widgets/controls",
385
+ "model_name": "HTMLModel",
386
+ "model_module_version": "1.5.0",
387
+ "state": {
388
+ "_dom_classes": [],
389
+ "_model_module": "@jupyter-widgets/controls",
390
+ "_model_module_version": "1.5.0",
391
+ "_model_name": "HTMLModel",
392
+ "_view_count": null,
393
+ "_view_module": "@jupyter-widgets/controls",
394
+ "_view_module_version": "1.5.0",
395
+ "_view_name": "HTMLView",
396
+ "description": "",
397
+ "description_tooltip": null,
398
+ "layout": "IPY_MODEL_7e28289b7a9b492985edebeb65b12506",
399
+ "placeholder": "​",
400
+ "style": "IPY_MODEL_ef59d65476a94c719432e29b33ec3ed4",
401
+ "value": "Map: 100%"
402
+ }
403
+ },
404
+ "269ab7a4f1064425a241faf4ebaf89fb": {
405
+ "model_module": "@jupyter-widgets/controls",
406
+ "model_name": "FloatProgressModel",
407
+ "model_module_version": "1.5.0",
408
+ "state": {
409
+ "_dom_classes": [],
410
+ "_model_module": "@jupyter-widgets/controls",
411
+ "_model_module_version": "1.5.0",
412
+ "_model_name": "FloatProgressModel",
413
+ "_view_count": null,
414
+ "_view_module": "@jupyter-widgets/controls",
415
+ "_view_module_version": "1.5.0",
416
+ "_view_name": "ProgressView",
417
+ "bar_style": "success",
418
+ "description": "",
419
+ "description_tooltip": null,
420
+ "layout": "IPY_MODEL_2da3299dcff549e6aace00a8f352ca81",
421
+ "max": 287,
422
+ "min": 0,
423
+ "orientation": "horizontal",
424
+ "style": "IPY_MODEL_c33c0ec5df094342aa061d37a5a77756",
425
+ "value": 287
426
+ }
427
+ },
428
+ "72307cabb4ac4b109a69fdc40932f0ce": {
429
+ "model_module": "@jupyter-widgets/controls",
430
+ "model_name": "HTMLModel",
431
+ "model_module_version": "1.5.0",
432
+ "state": {
433
+ "_dom_classes": [],
434
+ "_model_module": "@jupyter-widgets/controls",
435
+ "_model_module_version": "1.5.0",
436
+ "_model_name": "HTMLModel",
437
+ "_view_count": null,
438
+ "_view_module": "@jupyter-widgets/controls",
439
+ "_view_module_version": "1.5.0",
440
+ "_view_name": "HTMLView",
441
+ "description": "",
442
+ "description_tooltip": null,
443
+ "layout": "IPY_MODEL_407ae2a1bc6641e2a3d008542686eacd",
444
+ "placeholder": "​",
445
+ "style": "IPY_MODEL_d5d921a893a64c0f98ab81d1db621be5",
446
+ "value": " 287/287 [00:00<00:00, 335.23 examples/s]"
447
+ }
448
+ },
449
+ "2fe091b9cc8d4bc89cba8938772db6a5": {
450
+ "model_module": "@jupyter-widgets/base",
451
+ "model_name": "LayoutModel",
452
+ "model_module_version": "1.2.0",
453
+ "state": {
454
+ "_model_module": "@jupyter-widgets/base",
455
+ "_model_module_version": "1.2.0",
456
+ "_model_name": "LayoutModel",
457
+ "_view_count": null,
458
+ "_view_module": "@jupyter-widgets/base",
459
+ "_view_module_version": "1.2.0",
460
+ "_view_name": "LayoutView",
461
+ "align_content": null,
462
+ "align_items": null,
463
+ "align_self": null,
464
+ "border": null,
465
+ "bottom": null,
466
+ "display": null,
467
+ "flex": null,
468
+ "flex_flow": null,
469
+ "grid_area": null,
470
+ "grid_auto_columns": null,
471
+ "grid_auto_flow": null,
472
+ "grid_auto_rows": null,
473
+ "grid_column": null,
474
+ "grid_gap": null,
475
+ "grid_row": null,
476
+ "grid_template_areas": null,
477
+ "grid_template_columns": null,
478
+ "grid_template_rows": null,
479
+ "height": null,
480
+ "justify_content": null,
481
+ "justify_items": null,
482
+ "left": null,
483
+ "margin": null,
484
+ "max_height": null,
485
+ "max_width": null,
486
+ "min_height": null,
487
+ "min_width": null,
488
+ "object_fit": null,
489
+ "object_position": null,
490
+ "order": null,
491
+ "overflow": null,
492
+ "overflow_x": null,
493
+ "overflow_y": null,
494
+ "padding": null,
495
+ "right": null,
496
+ "top": null,
497
+ "visibility": null,
498
+ "width": null
499
+ }
500
+ },
501
+ "7e28289b7a9b492985edebeb65b12506": {
502
+ "model_module": "@jupyter-widgets/base",
503
+ "model_name": "LayoutModel",
504
+ "model_module_version": "1.2.0",
505
+ "state": {
506
+ "_model_module": "@jupyter-widgets/base",
507
+ "_model_module_version": "1.2.0",
508
+ "_model_name": "LayoutModel",
509
+ "_view_count": null,
510
+ "_view_module": "@jupyter-widgets/base",
511
+ "_view_module_version": "1.2.0",
512
+ "_view_name": "LayoutView",
513
+ "align_content": null,
514
+ "align_items": null,
515
+ "align_self": null,
516
+ "border": null,
517
+ "bottom": null,
518
+ "display": null,
519
+ "flex": null,
520
+ "flex_flow": null,
521
+ "grid_area": null,
522
+ "grid_auto_columns": null,
523
+ "grid_auto_flow": null,
524
+ "grid_auto_rows": null,
525
+ "grid_column": null,
526
+ "grid_gap": null,
527
+ "grid_row": null,
528
+ "grid_template_areas": null,
529
+ "grid_template_columns": null,
530
+ "grid_template_rows": null,
531
+ "height": null,
532
+ "justify_content": null,
533
+ "justify_items": null,
534
+ "left": null,
535
+ "margin": null,
536
+ "max_height": null,
537
+ "max_width": null,
538
+ "min_height": null,
539
+ "min_width": null,
540
+ "object_fit": null,
541
+ "object_position": null,
542
+ "order": null,
543
+ "overflow": null,
544
+ "overflow_x": null,
545
+ "overflow_y": null,
546
+ "padding": null,
547
+ "right": null,
548
+ "top": null,
549
+ "visibility": null,
550
+ "width": null
551
+ }
552
+ },
553
+ "ef59d65476a94c719432e29b33ec3ed4": {
554
+ "model_module": "@jupyter-widgets/controls",
555
+ "model_name": "DescriptionStyleModel",
556
+ "model_module_version": "1.5.0",
557
+ "state": {
558
+ "_model_module": "@jupyter-widgets/controls",
559
+ "_model_module_version": "1.5.0",
560
+ "_model_name": "DescriptionStyleModel",
561
+ "_view_count": null,
562
+ "_view_module": "@jupyter-widgets/base",
563
+ "_view_module_version": "1.2.0",
564
+ "_view_name": "StyleView",
565
+ "description_width": ""
566
+ }
567
+ },
568
+ "2da3299dcff549e6aace00a8f352ca81": {
569
+ "model_module": "@jupyter-widgets/base",
570
+ "model_name": "LayoutModel",
571
+ "model_module_version": "1.2.0",
572
+ "state": {
573
+ "_model_module": "@jupyter-widgets/base",
574
+ "_model_module_version": "1.2.0",
575
+ "_model_name": "LayoutModel",
576
+ "_view_count": null,
577
+ "_view_module": "@jupyter-widgets/base",
578
+ "_view_module_version": "1.2.0",
579
+ "_view_name": "LayoutView",
580
+ "align_content": null,
581
+ "align_items": null,
582
+ "align_self": null,
583
+ "border": null,
584
+ "bottom": null,
585
+ "display": null,
586
+ "flex": null,
587
+ "flex_flow": null,
588
+ "grid_area": null,
589
+ "grid_auto_columns": null,
590
+ "grid_auto_flow": null,
591
+ "grid_auto_rows": null,
592
+ "grid_column": null,
593
+ "grid_gap": null,
594
+ "grid_row": null,
595
+ "grid_template_areas": null,
596
+ "grid_template_columns": null,
597
+ "grid_template_rows": null,
598
+ "height": null,
599
+ "justify_content": null,
600
+ "justify_items": null,
601
+ "left": null,
602
+ "margin": null,
603
+ "max_height": null,
604
+ "max_width": null,
605
+ "min_height": null,
606
+ "min_width": null,
607
+ "object_fit": null,
608
+ "object_position": null,
609
+ "order": null,
610
+ "overflow": null,
611
+ "overflow_x": null,
612
+ "overflow_y": null,
613
+ "padding": null,
614
+ "right": null,
615
+ "top": null,
616
+ "visibility": null,
617
+ "width": null
618
+ }
619
+ },
620
+ "c33c0ec5df094342aa061d37a5a77756": {
621
+ "model_module": "@jupyter-widgets/controls",
622
+ "model_name": "ProgressStyleModel",
623
+ "model_module_version": "1.5.0",
624
+ "state": {
625
+ "_model_module": "@jupyter-widgets/controls",
626
+ "_model_module_version": "1.5.0",
627
+ "_model_name": "ProgressStyleModel",
628
+ "_view_count": null,
629
+ "_view_module": "@jupyter-widgets/base",
630
+ "_view_module_version": "1.2.0",
631
+ "_view_name": "StyleView",
632
+ "bar_color": null,
633
+ "description_width": ""
634
+ }
635
+ },
636
+ "407ae2a1bc6641e2a3d008542686eacd": {
637
+ "model_module": "@jupyter-widgets/base",
638
+ "model_name": "LayoutModel",
639
+ "model_module_version": "1.2.0",
640
+ "state": {
641
+ "_model_module": "@jupyter-widgets/base",
642
+ "_model_module_version": "1.2.0",
643
+ "_model_name": "LayoutModel",
644
+ "_view_count": null,
645
+ "_view_module": "@jupyter-widgets/base",
646
+ "_view_module_version": "1.2.0",
647
+ "_view_name": "LayoutView",
648
+ "align_content": null,
649
+ "align_items": null,
650
+ "align_self": null,
651
+ "border": null,
652
+ "bottom": null,
653
+ "display": null,
654
+ "flex": null,
655
+ "flex_flow": null,
656
+ "grid_area": null,
657
+ "grid_auto_columns": null,
658
+ "grid_auto_flow": null,
659
+ "grid_auto_rows": null,
660
+ "grid_column": null,
661
+ "grid_gap": null,
662
+ "grid_row": null,
663
+ "grid_template_areas": null,
664
+ "grid_template_columns": null,
665
+ "grid_template_rows": null,
666
+ "height": null,
667
+ "justify_content": null,
668
+ "justify_items": null,
669
+ "left": null,
670
+ "margin": null,
671
+ "max_height": null,
672
+ "max_width": null,
673
+ "min_height": null,
674
+ "min_width": null,
675
+ "object_fit": null,
676
+ "object_position": null,
677
+ "order": null,
678
+ "overflow": null,
679
+ "overflow_x": null,
680
+ "overflow_y": null,
681
+ "padding": null,
682
+ "right": null,
683
+ "top": null,
684
+ "visibility": null,
685
+ "width": null
686
+ }
687
+ },
688
+ "d5d921a893a64c0f98ab81d1db621be5": {
689
+ "model_module": "@jupyter-widgets/controls",
690
+ "model_name": "DescriptionStyleModel",
691
+ "model_module_version": "1.5.0",
692
+ "state": {
693
+ "_model_module": "@jupyter-widgets/controls",
694
+ "_model_module_version": "1.5.0",
695
+ "_model_name": "DescriptionStyleModel",
696
+ "_view_count": null,
697
+ "_view_module": "@jupyter-widgets/base",
698
+ "_view_module_version": "1.2.0",
699
+ "_view_name": "StyleView",
700
+ "description_width": ""
701
+ }
702
+ }
703
+ }
704
+ }
705
+ },
706
+ "cells": [
707
+ {
708
+ "cell_type": "code",
709
+ "execution_count": null,
710
+ "metadata": {
711
+ "colab": {
712
+ "base_uri": "https://localhost:8080/"
713
+ },
714
+ "id": "SRajt-tUH3ms",
715
+ "outputId": "90376d9c-68c7-4672-b07c-6f63ff2a4ad7"
716
+ },
717
+ "outputs": [
718
+ {
719
+ "output_type": "stream",
720
+ "name": "stdout",
721
+ "text": [
722
+ "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.31.0)\n",
723
+ "Requirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (2.14.4)\n",
724
+ "Requirement already satisfied: evaluate in /usr/local/lib/python3.10/dist-packages (0.4.0)\n",
725
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.2)\n",
726
+ "Requirement already satisfied: huggingface-hub<1.0,>=0.14.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.16.4)\n",
727
+ "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.23.5)\n",
728
+ "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.1)\n",
729
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n",
730
+ "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.6.3)\n",
731
+ "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n",
732
+ "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.13.3)\n",
733
+ "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.3.2)\n",
734
+ "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.1)\n",
735
+ "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (9.0.0)\n",
736
+ "Requirement already satisfied: dill<0.3.8,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.3.7)\n",
737
+ "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (1.5.3)\n",
738
+ "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets) (3.3.0)\n",
739
+ "Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (from datasets) (0.70.15)\n",
740
+ "Requirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (2023.6.0)\n",
741
+ "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.8.5)\n",
742
+ "Requirement already satisfied: responses<0.19 in /usr/local/lib/python3.10/dist-packages (from evaluate) (0.18.0)\n",
743
+ "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.1.0)\n",
744
+ "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (3.2.0)\n",
745
+ "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.0.4)\n",
746
+ "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n",
747
+ "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.9.2)\n",
748
+ "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.0)\n",
749
+ "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n",
750
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (4.7.1)\n",
751
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n",
752
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.4)\n",
753
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.7.22)\n",
754
+ "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n",
755
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2023.3)\n",
756
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n"
757
+ ]
758
+ }
759
+ ],
760
+ "source": [
761
+ "! pip install transformers datasets evaluate"
762
+ ]
763
+ },
764
+ {
765
+ "cell_type": "code",
766
+ "source": [
767
+ "! pip install transformers[sentencepiece]"
768
+ ],
769
+ "metadata": {
770
+ "colab": {
771
+ "base_uri": "https://localhost:8080/"
772
+ },
773
+ "id": "PgxEJq43QVaS",
774
+ "outputId": "ff72d40c-4904-4b53-f348-954424a5b240"
775
+ },
776
+ "execution_count": null,
777
+ "outputs": [
778
+ {
779
+ "output_type": "stream",
780
+ "name": "stdout",
781
+ "text": [
782
+ "Requirement already satisfied: transformers[sentencepiece] in /usr/local/lib/python3.10/dist-packages (4.31.0)\n",
783
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]) (3.12.2)\n",
784
+ "Requirement already satisfied: huggingface-hub<1.0,>=0.14.1 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]) (0.16.4)\n",
785
+ "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]) (1.23.5)\n",
786
+ "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]) (23.1)\n",
787
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]) (6.0.1)\n",
788
+ "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]) (2023.6.3)\n",
789
+ "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]) (2.31.0)\n",
790
+ "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]) (0.13.3)\n",
791
+ "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]) (0.3.2)\n",
792
+ "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]) (4.66.1)\n",
793
+ "Requirement already satisfied: sentencepiece!=0.1.92,>=0.1.91 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]) (0.1.99)\n",
794
+ "Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]) (3.20.3)\n",
795
+ "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers[sentencepiece]) (2023.6.0)\n",
796
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers[sentencepiece]) (4.7.1)\n",
797
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[sentencepiece]) (3.2.0)\n",
798
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[sentencepiece]) (3.4)\n",
799
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[sentencepiece]) (2.0.4)\n",
800
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[sentencepiece]) (2023.7.22)\n"
801
+ ]
802
+ }
803
+ ]
804
+ },
805
+ {
806
+ "cell_type": "code",
807
+ "source": [
808
+ "from transformers import AutoTokenizer\n",
809
+ "\n",
810
+ "tokenizer = AutoTokenizer.from_pretrained(\"openai-gpt\")\n",
811
+ "if tokenizer.pad_token is None:\n",
812
+ " tokenizer.add_special_tokens({'pad_token': '[PAD]'})"
813
+ ],
814
+ "metadata": {
815
+ "id": "rjE6lHHJJdyv",
816
+ "colab": {
817
+ "base_uri": "https://localhost:8080/"
818
+ },
819
+ "outputId": "3baa86d7-6a43-4894-a415-756b614514ff"
820
+ },
821
+ "execution_count": null,
822
+ "outputs": [
823
+ {
824
+ "output_type": "stream",
825
+ "name": "stderr",
826
+ "text": [
827
+ "Using pad_token, but it is not set yet.\n"
828
+ ]
829
+ }
830
+ ]
831
+ },
832
+ {
833
+ "cell_type": "code",
834
+ "source": [
835
+ "import pandas as pd\n",
836
+ "from sklearn.model_selection import train_test_split\n",
837
+ "\n",
838
+ "data = pd.read_csv(\"ielts_writing_dataset_new.csv\")\n",
839
+ "\n",
840
+ "data.label = data.label.replace(1,0)\n",
841
+ "data.label = data.label.replace(3,0)\n",
842
+ "data.label = data.label.replace(3.5,0)\n",
843
+ "data.label = data.label.replace(4,0)\n",
844
+ "data.label = data.label.replace(4.5,0)\n",
845
+ "data.label = data.label.replace(5,0)\n",
846
+ "data.label = data.label.replace(5.5,1)\n",
847
+ "data.label = data.label.replace(6,1)\n",
848
+ "data.label = data.label.replace(6.5,1)\n",
849
+ "data.label = data.label.replace(7,1)\n",
850
+ "data.label = data.label.replace(7.5,1)\n",
851
+ "data.label = data.label.replace(8,2)\n",
852
+ "data.label = data.label.replace(8.5,2)\n",
853
+ "data.label = data.label.replace(9,2)\n",
854
+ "\n",
855
+ "data.label = data.label.astype(int)\n",
856
+ "\n",
857
+ "train, test = train_test_split(data, test_size=0.2)\n"
858
+ ],
859
+ "metadata": {
860
+ "id": "GpD5w5t2JihL"
861
+ },
862
+ "execution_count": null,
863
+ "outputs": []
864
+ },
865
+ {
866
+ "cell_type": "code",
867
+ "source": [
868
+ "data[:10]"
869
+ ],
870
+ "metadata": {
871
+ "colab": {
872
+ "base_uri": "https://localhost:8080/",
873
+ "height": 363
874
+ },
875
+ "id": "Cos-ypQ7n7d9",
876
+ "outputId": "3ed23ed5-1b04-438f-fede-18448588a866"
877
+ },
878
+ "execution_count": null,
879
+ "outputs": [
880
+ {
881
+ "output_type": "execute_result",
882
+ "data": {
883
+ "text/plain": [
884
+ " label text\n",
885
+ "0 1 Between 1995 and 2010, a study was conducted r...\n",
886
+ "1 1 Poverty represents a worldwide crisis. It is t...\n",
887
+ "2 0 The left chart shows the population change hap...\n",
888
+ "3 1 Human beings are facing many challenges nowada...\n",
889
+ "4 1 Information about the thousands of visits from...\n",
890
+ "5 1 Whether countries should only invest facilitie...\n",
891
+ "6 1 This graph depicts the changes in tourists vis...\n",
892
+ "7 1 Sports is an essential part to most of us , so...\n",
893
+ "8 2 The line graph illustrates the number of overs...\n",
894
+ "9 2 International sports events require the most w..."
895
+ ],
896
+ "text/html": [
897
+ "\n",
898
+ " <div id=\"df-e1afc1ea-3f69-4955-a8a4-af02940fef5a\" class=\"colab-df-container\">\n",
899
+ " <div>\n",
900
+ "<style scoped>\n",
901
+ " .dataframe tbody tr th:only-of-type {\n",
902
+ " vertical-align: middle;\n",
903
+ " }\n",
904
+ "\n",
905
+ " .dataframe tbody tr th {\n",
906
+ " vertical-align: top;\n",
907
+ " }\n",
908
+ "\n",
909
+ " .dataframe thead th {\n",
910
+ " text-align: right;\n",
911
+ " }\n",
912
+ "</style>\n",
913
+ "<table border=\"1\" class=\"dataframe\">\n",
914
+ " <thead>\n",
915
+ " <tr style=\"text-align: right;\">\n",
916
+ " <th></th>\n",
917
+ " <th>label</th>\n",
918
+ " <th>text</th>\n",
919
+ " </tr>\n",
920
+ " </thead>\n",
921
+ " <tbody>\n",
922
+ " <tr>\n",
923
+ " <th>0</th>\n",
924
+ " <td>1</td>\n",
925
+ " <td>Between 1995 and 2010, a study was conducted r...</td>\n",
926
+ " </tr>\n",
927
+ " <tr>\n",
928
+ " <th>1</th>\n",
929
+ " <td>1</td>\n",
930
+ " <td>Poverty represents a worldwide crisis. It is t...</td>\n",
931
+ " </tr>\n",
932
+ " <tr>\n",
933
+ " <th>2</th>\n",
934
+ " <td>0</td>\n",
935
+ " <td>The left chart shows the population change hap...</td>\n",
936
+ " </tr>\n",
937
+ " <tr>\n",
938
+ " <th>3</th>\n",
939
+ " <td>1</td>\n",
940
+ " <td>Human beings are facing many challenges nowada...</td>\n",
941
+ " </tr>\n",
942
+ " <tr>\n",
943
+ " <th>4</th>\n",
944
+ " <td>1</td>\n",
945
+ " <td>Information about the thousands of visits from...</td>\n",
946
+ " </tr>\n",
947
+ " <tr>\n",
948
+ " <th>5</th>\n",
949
+ " <td>1</td>\n",
950
+ " <td>Whether countries should only invest facilitie...</td>\n",
951
+ " </tr>\n",
952
+ " <tr>\n",
953
+ " <th>6</th>\n",
954
+ " <td>1</td>\n",
955
+ " <td>This graph depicts the changes in tourists vis...</td>\n",
956
+ " </tr>\n",
957
+ " <tr>\n",
958
+ " <th>7</th>\n",
959
+ " <td>1</td>\n",
960
+ " <td>Sports is an essential part to most of us , so...</td>\n",
961
+ " </tr>\n",
962
+ " <tr>\n",
963
+ " <th>8</th>\n",
964
+ " <td>2</td>\n",
965
+ " <td>The line graph illustrates the number of overs...</td>\n",
966
+ " </tr>\n",
967
+ " <tr>\n",
968
+ " <th>9</th>\n",
969
+ " <td>2</td>\n",
970
+ " <td>International sports events require the most w...</td>\n",
971
+ " </tr>\n",
972
+ " </tbody>\n",
973
+ "</table>\n",
974
+ "</div>\n",
975
+ " <div class=\"colab-df-buttons\">\n",
976
+ "\n",
977
+ " <div class=\"colab-df-container\">\n",
978
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-e1afc1ea-3f69-4955-a8a4-af02940fef5a')\"\n",
979
+ " title=\"Convert this dataframe to an interactive table.\"\n",
980
+ " style=\"display:none;\">\n",
981
+ "\n",
982
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
983
+ " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
984
+ " </svg>\n",
985
+ " </button>\n",
986
+ "\n",
987
+ " <style>\n",
988
+ " .colab-df-container {\n",
989
+ " display:flex;\n",
990
+ " gap: 12px;\n",
991
+ " }\n",
992
+ "\n",
993
+ " .colab-df-convert {\n",
994
+ " background-color: #E8F0FE;\n",
995
+ " border: none;\n",
996
+ " border-radius: 50%;\n",
997
+ " cursor: pointer;\n",
998
+ " display: none;\n",
999
+ " fill: #1967D2;\n",
1000
+ " height: 32px;\n",
1001
+ " padding: 0 0 0 0;\n",
1002
+ " width: 32px;\n",
1003
+ " }\n",
1004
+ "\n",
1005
+ " .colab-df-convert:hover {\n",
1006
+ " background-color: #E2EBFA;\n",
1007
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
1008
+ " fill: #174EA6;\n",
1009
+ " }\n",
1010
+ "\n",
1011
+ " .colab-df-buttons div {\n",
1012
+ " margin-bottom: 4px;\n",
1013
+ " }\n",
1014
+ "\n",
1015
+ " [theme=dark] .colab-df-convert {\n",
1016
+ " background-color: #3B4455;\n",
1017
+ " fill: #D2E3FC;\n",
1018
+ " }\n",
1019
+ "\n",
1020
+ " [theme=dark] .colab-df-convert:hover {\n",
1021
+ " background-color: #434B5C;\n",
1022
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
1023
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
1024
+ " fill: #FFFFFF;\n",
1025
+ " }\n",
1026
+ " </style>\n",
1027
+ "\n",
1028
+ " <script>\n",
1029
+ " const buttonEl =\n",
1030
+ " document.querySelector('#df-e1afc1ea-3f69-4955-a8a4-af02940fef5a button.colab-df-convert');\n",
1031
+ " buttonEl.style.display =\n",
1032
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
1033
+ "\n",
1034
+ " async function convertToInteractive(key) {\n",
1035
+ " const element = document.querySelector('#df-e1afc1ea-3f69-4955-a8a4-af02940fef5a');\n",
1036
+ " const dataTable =\n",
1037
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
1038
+ " [key], {});\n",
1039
+ " if (!dataTable) return;\n",
1040
+ "\n",
1041
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
1042
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
1043
+ " + ' to learn more about interactive tables.';\n",
1044
+ " element.innerHTML = '';\n",
1045
+ " dataTable['output_type'] = 'display_data';\n",
1046
+ " await google.colab.output.renderOutput(dataTable, element);\n",
1047
+ " const docLink = document.createElement('div');\n",
1048
+ " docLink.innerHTML = docLinkHtml;\n",
1049
+ " element.appendChild(docLink);\n",
1050
+ " }\n",
1051
+ " </script>\n",
1052
+ " </div>\n",
1053
+ "\n",
1054
+ "\n",
1055
+ "<div id=\"df-e3bf3e3c-13d8-4f36-bcbd-d82762dcd108\">\n",
1056
+ " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-e3bf3e3c-13d8-4f36-bcbd-d82762dcd108')\"\n",
1057
+ " title=\"Suggest charts.\"\n",
1058
+ " style=\"display:none;\">\n",
1059
+ "\n",
1060
+ "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
1061
+ " width=\"24px\">\n",
1062
+ " <g>\n",
1063
+ " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
1064
+ " </g>\n",
1065
+ "</svg>\n",
1066
+ " </button>\n",
1067
+ "\n",
1068
+ "<style>\n",
1069
+ " .colab-df-quickchart {\n",
1070
+ " background-color: #E8F0FE;\n",
1071
+ " border: none;\n",
1072
+ " border-radius: 50%;\n",
1073
+ " cursor: pointer;\n",
1074
+ " display: none;\n",
1075
+ " fill: #1967D2;\n",
1076
+ " height: 32px;\n",
1077
+ " padding: 0 0 0 0;\n",
1078
+ " width: 32px;\n",
1079
+ " }\n",
1080
+ "\n",
1081
+ " .colab-df-quickchart:hover {\n",
1082
+ " background-color: #E2EBFA;\n",
1083
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
1084
+ " fill: #174EA6;\n",
1085
+ " }\n",
1086
+ "\n",
1087
+ " [theme=dark] .colab-df-quickchart {\n",
1088
+ " background-color: #3B4455;\n",
1089
+ " fill: #D2E3FC;\n",
1090
+ " }\n",
1091
+ "\n",
1092
+ " [theme=dark] .colab-df-quickchart:hover {\n",
1093
+ " background-color: #434B5C;\n",
1094
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
1095
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
1096
+ " fill: #FFFFFF;\n",
1097
+ " }\n",
1098
+ "</style>\n",
1099
+ "\n",
1100
+ " <script>\n",
1101
+ " async function quickchart(key) {\n",
1102
+ " const charts = await google.colab.kernel.invokeFunction(\n",
1103
+ " 'suggestCharts', [key], {});\n",
1104
+ " }\n",
1105
+ " (() => {\n",
1106
+ " let quickchartButtonEl =\n",
1107
+ " document.querySelector('#df-e3bf3e3c-13d8-4f36-bcbd-d82762dcd108 button');\n",
1108
+ " quickchartButtonEl.style.display =\n",
1109
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
1110
+ " })();\n",
1111
+ " </script>\n",
1112
+ "</div>\n",
1113
+ " </div>\n",
1114
+ " </div>\n"
1115
+ ]
1116
+ },
1117
+ "metadata": {},
1118
+ "execution_count": 5
1119
+ }
1120
+ ]
1121
+ },
1122
+ {
1123
+ "cell_type": "code",
1124
+ "source": [
1125
+ "import datasets\n",
1126
+ "from datasets import Dataset, DatasetDict\n",
1127
+ "\n",
1128
+ "train = Dataset.from_pandas(train)\n",
1129
+ "test = Dataset.from_pandas(test)\n",
1130
+ "\n",
1131
+ "\n",
1132
+ "dataset = DatasetDict()\n",
1133
+ "\n",
1134
+ "dataset['train'] = train\n",
1135
+ "dataset['test'] = test\n",
1136
+ "dataset = dataset.remove_columns([\"__index_level_0__\"])\n",
1137
+ "dataset"
1138
+ ],
1139
+ "metadata": {
1140
+ "colab": {
1141
+ "base_uri": "https://localhost:8080/"
1142
+ },
1143
+ "id": "Mi7bkZ00L6ZB",
1144
+ "outputId": "55c9e88a-1902-433d-e79c-9d0003e2c359"
1145
+ },
1146
+ "execution_count": null,
1147
+ "outputs": [
1148
+ {
1149
+ "output_type": "execute_result",
1150
+ "data": {
1151
+ "text/plain": [
1152
+ "DatasetDict({\n",
1153
+ " train: Dataset({\n",
1154
+ " features: ['label', 'text'],\n",
1155
+ " num_rows: 1148\n",
1156
+ " })\n",
1157
+ " test: Dataset({\n",
1158
+ " features: ['label', 'text'],\n",
1159
+ " num_rows: 287\n",
1160
+ " })\n",
1161
+ "})"
1162
+ ]
1163
+ },
1164
+ "metadata": {},
1165
+ "execution_count": 6
1166
+ }
1167
+ ]
1168
+ },
1169
+ {
1170
+ "cell_type": "code",
1171
+ "source": [
1172
+ "dataset[\"test\"][0]"
1173
+ ],
1174
+ "metadata": {
1175
+ "colab": {
1176
+ "base_uri": "https://localhost:8080/"
1177
+ },
1178
+ "id": "QGCPOgv5MO1k",
1179
+ "outputId": "7eb26583-9f2a-47c6-8db3-a35829e732b2"
1180
+ },
1181
+ "execution_count": null,
1182
+ "outputs": [
1183
+ {
1184
+ "output_type": "execute_result",
1185
+ "data": {
1186
+ "text/plain": [
1187
+ "{'label': 1,\n",
1188
+ " 'text': 'Information about the thousands of visits from overseas to three different European natural places during 1987 and 2007 is provided in the given line chart.\\nOverall, it can be seen that the number of visitors increased significantly in the three places compared to the initial year. Although, visits to Europeans lakes demostrated more changes over the 20 years than its counterparts.\\nIn more detail, the most steady growth was experienced by the visits to Europeans mountains. For example, from 1987 the number of visitors grew from 20,000 to almost the double 20 years later. Similarly, visits to the coast also rose after a slight fall in 1992, reaching almost twice as much since 1987, with 75,000.\\nThose visiting Europeans lakes subtantially increased over the years from 10 thousand to a peak of 75 thousand in 2002. Despite falling for about 25 thousand in 2007, the visitis to this place remained higher compared to 1987, with 50,000 at the end of the period.'}"
1189
+ ]
1190
+ },
1191
+ "metadata": {},
1192
+ "execution_count": 7
1193
+ }
1194
+ ]
1195
+ },
1196
+ {
1197
+ "cell_type": "code",
1198
+ "source": [
1199
+ "def preprocess_function(examples):\n",
1200
+ " return tokenizer(examples[\"text\"], truncation=True)"
1201
+ ],
1202
+ "metadata": {
1203
+ "id": "z-Q57XYTMWsU"
1204
+ },
1205
+ "execution_count": null,
1206
+ "outputs": []
1207
+ },
1208
+ {
1209
+ "cell_type": "code",
1210
+ "source": [
1211
+ "tokenized_dataset = dataset.map(preprocess_function, batched=True)"
1212
+ ],
1213
+ "metadata": {
1214
+ "colab": {
1215
+ "base_uri": "https://localhost:8080/",
1216
+ "height": 81,
1217
+ "referenced_widgets": [
1218
+ "1256055feca84c27b93af1ed7a828b9e",
1219
+ "f772925eecb044f28d0f932ec0c00882",
1220
+ "810b09d05a6547278a41d0e4a940f8a2",
1221
+ "22eaa7679d9f42dc9257f76e7271408c",
1222
+ "a3395bf7baf34bc78547a1562083da55",
1223
+ "54086bb13def472bb0f72d4b978847e8",
1224
+ "56a704b26e754480a52dbea6a27b4d2c",
1225
+ "f61f2cffba5b4fc59b384dc6966449d2",
1226
+ "321469b5621f46e1bd4d0008340f2acc",
1227
+ "96dd7bd98af240e5a0a8398883253cad",
1228
+ "bb18e321dc274d64931498b11fde1698",
1229
+ "97cd971a20ca4510aa190fe9abdbbc03",
1230
+ "5f1a9f6d14074f1fb43043f6df94b510",
1231
+ "269ab7a4f1064425a241faf4ebaf89fb",
1232
+ "72307cabb4ac4b109a69fdc40932f0ce",
1233
+ "2fe091b9cc8d4bc89cba8938772db6a5",
1234
+ "7e28289b7a9b492985edebeb65b12506",
1235
+ "ef59d65476a94c719432e29b33ec3ed4",
1236
+ "2da3299dcff549e6aace00a8f352ca81",
1237
+ "c33c0ec5df094342aa061d37a5a77756",
1238
+ "407ae2a1bc6641e2a3d008542686eacd",
1239
+ "d5d921a893a64c0f98ab81d1db621be5"
1240
+ ]
1241
+ },
1242
+ "id": "0-Api6H3Mcqc",
1243
+ "outputId": "8910951c-a339-4726-fcfd-08898e956327"
1244
+ },
1245
+ "execution_count": null,
1246
+ "outputs": [
1247
+ {
1248
+ "output_type": "display_data",
1249
+ "data": {
1250
+ "text/plain": [
1251
+ "Map: 0%| | 0/1148 [00:00<?, ? examples/s]"
1252
+ ],
1253
+ "application/vnd.jupyter.widget-view+json": {
1254
+ "version_major": 2,
1255
+ "version_minor": 0,
1256
+ "model_id": "1256055feca84c27b93af1ed7a828b9e"
1257
+ }
1258
+ },
1259
+ "metadata": {}
1260
+ },
1261
+ {
1262
+ "output_type": "display_data",
1263
+ "data": {
1264
+ "text/plain": [
1265
+ "Map: 0%| | 0/287 [00:00<?, ? examples/s]"
1266
+ ],
1267
+ "application/vnd.jupyter.widget-view+json": {
1268
+ "version_major": 2,
1269
+ "version_minor": 0,
1270
+ "model_id": "97cd971a20ca4510aa190fe9abdbbc03"
1271
+ }
1272
+ },
1273
+ "metadata": {}
1274
+ }
1275
+ ]
1276
+ },
1277
+ {
1278
+ "cell_type": "code",
1279
+ "source": [
1280
+ "from transformers import DataCollatorWithPadding\n",
1281
+ "\n",
1282
+ "data_collator = DataCollatorWithPadding(tokenizer=tokenizer)"
1283
+ ],
1284
+ "metadata": {
1285
+ "id": "CMgTijF_MkZ-"
1286
+ },
1287
+ "execution_count": null,
1288
+ "outputs": []
1289
+ },
1290
+ {
1291
+ "cell_type": "code",
1292
+ "source": [
1293
+ "tokenized_dataset['train']"
1294
+ ],
1295
+ "metadata": {
1296
+ "colab": {
1297
+ "base_uri": "https://localhost:8080/"
1298
+ },
1299
+ "id": "pFa_-NPcXQM3",
1300
+ "outputId": "8fbc38fd-1e84-4fc4-f25f-55a5e886c5c3"
1301
+ },
1302
+ "execution_count": null,
1303
+ "outputs": [
1304
+ {
1305
+ "output_type": "execute_result",
1306
+ "data": {
1307
+ "text/plain": [
1308
+ "Dataset({\n",
1309
+ " features: ['label', 'text', 'input_ids', 'attention_mask'],\n",
1310
+ " num_rows: 1148\n",
1311
+ "})"
1312
+ ]
1313
+ },
1314
+ "metadata": {},
1315
+ "execution_count": 11
1316
+ }
1317
+ ]
1318
+ },
1319
+ {
1320
+ "cell_type": "code",
1321
+ "source": [
1322
+ "import evaluate\n",
1323
+ "\n",
1324
+ "accuracy = evaluate.load(\"accuracy\")"
1325
+ ],
1326
+ "metadata": {
1327
+ "id": "zHjByQbVMobe"
1328
+ },
1329
+ "execution_count": null,
1330
+ "outputs": []
1331
+ },
1332
+ {
1333
+ "cell_type": "code",
1334
+ "source": [
1335
+ "import numpy as np\n",
1336
+ "\n",
1337
+ "\n",
1338
+ "def compute_metrics(eval_pred):\n",
1339
+ " predictions, labels = eval_pred\n",
1340
+ " predictions = np.argmax(predictions, axis=1)\n",
1341
+ " return accuracy.compute(predictions=predictions, references=labels)"
1342
+ ],
1343
+ "metadata": {
1344
+ "id": "GQJysWFsMsyR"
1345
+ },
1346
+ "execution_count": null,
1347
+ "outputs": []
1348
+ },
1349
+ {
1350
+ "cell_type": "code",
1351
+ "source": [
1352
+ "# id2label = {0: '1', 1:'3', 2:'3.5', 3:'4', 4:'4.5',5:'5', 6:'5.5', 7:'6', 8:'6.5',9:'7',10:'7.5',11:'8',12:'8.5',13:'9'}\n",
1353
+ "# label2id = {'1':0,'3':1,'3.5':2,'4':3,'4.5':4,'5':5,'5.5':6,'6':7,'6.5':8,'7':9,'7.5':10,'8':11,'8.5':12,'9':13}\n",
1354
+ "id2label = {0:\"Bad\",1:\"Acceptable\",2:\"Excellent\"}\n",
1355
+ "label2id = {\"Bad\":0,\"Acceptable\":1,\"Excellent\":2}\n",
1356
+ "\n"
1357
+ ],
1358
+ "metadata": {
1359
+ "id": "HgDWrzrvMvDW"
1360
+ },
1361
+ "execution_count": null,
1362
+ "outputs": []
1363
+ },
1364
+ {
1365
+ "cell_type": "code",
1366
+ "source": [
1367
+ "# ! pip install -q -U bitsandbytes\n",
1368
+ "# ! pip install -q -U git+https://github.com/huggingface/transformers.git\n",
1369
+ "# ! pip install -q -U git+https://github.com/huggingface/peft.git\n",
1370
+ "# ! pip install -q -U git+https://github.com/huggingface/accelerate.git\n"
1371
+ ],
1372
+ "metadata": {
1373
+ "id": "XssyeGF8Uxc5"
1374
+ },
1375
+ "execution_count": null,
1376
+ "outputs": []
1377
+ },
1378
+ {
1379
+ "cell_type": "code",
1380
+ "source": [
1381
+ "!python -m pip install huggingface_hub\n",
1382
+ "!huggingface-cli login"
1383
+ ],
1384
+ "metadata": {
1385
+ "colab": {
1386
+ "base_uri": "https://localhost:8080/"
1387
+ },
1388
+ "id": "cChMqnAEFvfX",
1389
+ "outputId": "95d83683-e9a2-440d-a8ca-44f08da12fd3"
1390
+ },
1391
+ "execution_count": null,
1392
+ "outputs": [
1393
+ {
1394
+ "output_type": "stream",
1395
+ "name": "stdout",
1396
+ "text": [
1397
+ "Requirement already satisfied: huggingface_hub in /usr/local/lib/python3.10/dist-packages (0.16.4)\n",
1398
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (3.12.2)\n",
1399
+ "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (2023.6.0)\n",
1400
+ "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (2.31.0)\n",
1401
+ "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (4.66.1)\n",
1402
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (6.0.1)\n",
1403
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (4.7.1)\n",
1404
+ "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (23.1)\n",
1405
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface_hub) (3.2.0)\n",
1406
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface_hub) (3.4)\n",
1407
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface_hub) (2.0.4)\n",
1408
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface_hub) (2023.7.22)\n",
1409
+ "\n",
1410
+ " _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_|\n",
1411
+ " _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n",
1412
+ " _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_|\n",
1413
+ " _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n",
1414
+ " _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_|\n",
1415
+ " \n",
1416
+ " A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.\n",
1417
+ " Setting a new token will erase the existing one.\n",
1418
+ " To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .\n",
1419
+ "Token: \n",
1420
+ "Add token as git credential? (Y/n) Y\n",
1421
+ "Token is valid (permission: write).\n",
1422
+ "\u001b[1m\u001b[31mCannot authenticate through git-credential as no helper is defined on your machine.\n",
1423
+ "You might have to re-authenticate when pushing to the Hugging Face Hub.\n",
1424
+ "Run the following command in your terminal in case you want to set the 'store' credential helper as default.\n",
1425
+ "\n",
1426
+ "git config --global credential.helper store\n",
1427
+ "\n",
1428
+ "Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details.\u001b[0m\n",
1429
+ "Token has not been saved to git credential helper.\n",
1430
+ "Your token has been saved to /root/.cache/huggingface/token\n",
1431
+ "Login successful\n"
1432
+ ]
1433
+ }
1434
+ ]
1435
+ },
1436
+ {
1437
+ "cell_type": "code",
1438
+ "source": [
1439
+ "from transformers import OpenAIGPTForSequenceClassification, TrainingArguments, Trainer\n",
1440
+ "\n",
1441
+ "model = OpenAIGPTForSequenceClassification.from_pretrained(\"openai-gpt\",num_labels=3, id2label=id2label, label2id=label2id)\n"
1442
+ ],
1443
+ "metadata": {
1444
+ "colab": {
1445
+ "base_uri": "https://localhost:8080/"
1446
+ },
1447
+ "id": "7xaZqPOzOVJP",
1448
+ "outputId": "70b45351-6414-412c-d2de-c68645325680"
1449
+ },
1450
+ "execution_count": null,
1451
+ "outputs": [
1452
+ {
1453
+ "output_type": "stream",
1454
+ "name": "stderr",
1455
+ "text": [
1456
+ "Some weights of OpenAIGPTForSequenceClassification were not initialized from the model checkpoint at openai-gpt and are newly initialized: ['score.weight']\n",
1457
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
1458
+ ]
1459
+ }
1460
+ ]
1461
+ },
1462
+ {
1463
+ "cell_type": "code",
1464
+ "source": [
1465
+ "! pip install transformers[torch]"
1466
+ ],
1467
+ "metadata": {
1468
+ "id": "s7bor4hUOq4q",
1469
+ "colab": {
1470
+ "base_uri": "https://localhost:8080/"
1471
+ },
1472
+ "outputId": "1a68a8e8-1666-4b47-8860-e622891aa24b"
1473
+ },
1474
+ "execution_count": null,
1475
+ "outputs": [
1476
+ {
1477
+ "output_type": "stream",
1478
+ "name": "stdout",
1479
+ "text": [
1480
+ "Requirement already satisfied: transformers[torch] in /usr/local/lib/python3.10/dist-packages (4.31.0)\n",
1481
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (3.12.2)\n",
1482
+ "Requirement already satisfied: huggingface-hub<1.0,>=0.14.1 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.16.4)\n",
1483
+ "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (1.23.5)\n",
1484
+ "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (23.1)\n",
1485
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (6.0.1)\n",
1486
+ "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (2023.6.3)\n",
1487
+ "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (2.31.0)\n",
1488
+ "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.13.3)\n",
1489
+ "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.3.2)\n",
1490
+ "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (4.66.1)\n",
1491
+ "Requirement already satisfied: torch!=1.12.0,>=1.9 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (2.0.1+cu118)\n",
1492
+ "Requirement already satisfied: accelerate>=0.20.3 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.21.0)\n",
1493
+ "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.20.3->transformers[torch]) (5.9.5)\n",
1494
+ "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers[torch]) (2023.6.0)\n",
1495
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers[torch]) (4.7.1)\n",
1496
+ "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch!=1.12.0,>=1.9->transformers[torch]) (1.12)\n",
1497
+ "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch!=1.12.0,>=1.9->transformers[torch]) (3.1)\n",
1498
+ "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch!=1.12.0,>=1.9->transformers[torch]) (3.1.2)\n",
1499
+ "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch!=1.12.0,>=1.9->transformers[torch]) (2.0.0)\n",
1500
+ "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch!=1.12.0,>=1.9->transformers[torch]) (3.27.2)\n",
1501
+ "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch!=1.12.0,>=1.9->transformers[torch]) (16.0.6)\n",
1502
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (3.2.0)\n",
1503
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (3.4)\n",
1504
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (2.0.4)\n",
1505
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (2023.7.22)\n",
1506
+ "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch!=1.12.0,>=1.9->transformers[torch]) (2.1.3)\n",
1507
+ "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch!=1.12.0,>=1.9->transformers[torch]) (1.3.0)\n"
1508
+ ]
1509
+ }
1510
+ ]
1511
+ },
1512
+ {
1513
+ "cell_type": "code",
1514
+ "source": [
1515
+ "# from torch import nn\n",
1516
+ "\n",
1517
+ "# class ClassificationTrainer(Trainer):\n",
1518
+ "# def compute_loss(self, model, inputs, return_outputs=False):\n",
1519
+ "# labels = inputs.get(\"label\")\n",
1520
+ "# outputs = model(**inputs)\n",
1521
+ "# outputs = outputs.unsqueeze(1)\n",
1522
+ "# logits = outputs.get('logits')\n",
1523
+ "# loss_fct = nn.CrossEntropyLoss()\n",
1524
+ "# loss = loss_fct(logits.squeeze(), labels.squeeze())\n",
1525
+ "# return (loss, outputs) if return_outputs else loss"
1526
+ ],
1527
+ "metadata": {
1528
+ "id": "KQ2UskBkU4D9"
1529
+ },
1530
+ "execution_count": null,
1531
+ "outputs": []
1532
+ },
1533
+ {
1534
+ "cell_type": "code",
1535
+ "source": [
1536
+ "training_args = TrainingArguments(\n",
1537
+ " output_dir=\"essayl0\",\n",
1538
+ " learning_rate=2e-5,\n",
1539
+ " per_device_train_batch_size=1,\n",
1540
+ " per_device_eval_batch_size=1,\n",
1541
+ " num_train_epochs=15,\n",
1542
+ " weight_decay=0.01,\n",
1543
+ " evaluation_strategy=\"epoch\",\n",
1544
+ " save_strategy=\"epoch\",\n",
1545
+ " load_best_model_at_end=True,\n",
1546
+ " push_to_hub = True,\n",
1547
+ ")\n",
1548
+ "\n",
1549
+ "trainer = Trainer(\n",
1550
+ " model=model,\n",
1551
+ " args=training_args,\n",
1552
+ " train_dataset=tokenized_dataset[\"train\"],\n",
1553
+ " eval_dataset=tokenized_dataset[\"test\"],\n",
1554
+ " tokenizer=tokenizer,\n",
1555
+ " data_collator=data_collator,\n",
1556
+ " compute_metrics=compute_metrics,\n",
1557
+ ")\n",
1558
+ "\n",
1559
+ "trainer.train()"
1560
+ ],
1561
+ "metadata": {
1562
+ "id": "BwyTlAy0OdRS",
1563
+ "colab": {
1564
+ "base_uri": "https://localhost:8080/",
1565
+ "height": 1000
1566
+ },
1567
+ "outputId": "7b552adc-dce3-469a-abd5-82e841dd0103"
1568
+ },
1569
+ "execution_count": null,
1570
+ "outputs": [
1571
+ {
1572
+ "metadata": {
1573
+ "tags": null
1574
+ },
1575
+ "name": "stderr",
1576
+ "output_type": "stream",
1577
+ "text": [
1578
+ "/content/essayl0 is already a clone of https://huggingface.co/karanzrk/essayl0. Make sure you pull the latest changes with `repo.git_pull()`.\n",
1579
+ "WARNING:huggingface_hub.repository:/content/essayl0 is already a clone of https://huggingface.co/karanzrk/essayl0. Make sure you pull the latest changes with `repo.git_pull()`.\n",
1580
+ "/usr/local/lib/python3.10/dist-packages/transformers/optimization.py:411: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
1581
+ " warnings.warn(\n",
1582
+ "You're using a OpenAIGPTTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
1583
+ ]
1584
+ },
1585
+ {
1586
+ "data": {
1587
+ "text/html": [
1588
+ "\n",
1589
+ " <div>\n",
1590
+ " \n",
1591
+ " <progress value='13211' max='17220' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
1592
+ " [13211/17220 34:51 < 10:34, 6.32 it/s, Epoch 11.51/15]\n",
1593
+ " </div>\n",
1594
+ " <table border=\"1\" class=\"dataframe\">\n",
1595
+ " <thead>\n",
1596
+ " <tr style=\"text-align: left;\">\n",
1597
+ " <th>Epoch</th>\n",
1598
+ " <th>Training Loss</th>\n",
1599
+ " <th>Validation Loss</th>\n",
1600
+ " <th>Accuracy</th>\n",
1601
+ " </tr>\n",
1602
+ " </thead>\n",
1603
+ " <tbody>\n",
1604
+ " <tr>\n",
1605
+ " <td>1</td>\n",
1606
+ " <td>1.118400</td>\n",
1607
+ " <td>1.142566</td>\n",
1608
+ " <td>0.811847</td>\n",
1609
+ " </tr>\n",
1610
+ " <tr>\n",
1611
+ " <td>2</td>\n",
1612
+ " <td>1.023800</td>\n",
1613
+ " <td>0.884835</td>\n",
1614
+ " <td>0.818815</td>\n",
1615
+ " </tr>\n",
1616
+ " <tr>\n",
1617
+ " <td>3</td>\n",
1618
+ " <td>0.685000</td>\n",
1619
+ " <td>0.902960</td>\n",
1620
+ " <td>0.836237</td>\n",
1621
+ " </tr>\n",
1622
+ " <tr>\n",
1623
+ " <td>4</td>\n",
1624
+ " <td>0.322000</td>\n",
1625
+ " <td>1.255817</td>\n",
1626
+ " <td>0.815331</td>\n",
1627
+ " </tr>\n",
1628
+ " <tr>\n",
1629
+ " <td>5</td>\n",
1630
+ " <td>0.036000</td>\n",
1631
+ " <td>1.604122</td>\n",
1632
+ " <td>0.846690</td>\n",
1633
+ " </tr>\n",
1634
+ " <tr>\n",
1635
+ " <td>6</td>\n",
1636
+ " <td>0.045100</td>\n",
1637
+ " <td>1.756756</td>\n",
1638
+ " <td>0.853659</td>\n",
1639
+ " </tr>\n",
1640
+ " <tr>\n",
1641
+ " <td>7</td>\n",
1642
+ " <td>0.089400</td>\n",
1643
+ " <td>1.627898</td>\n",
1644
+ " <td>0.843206</td>\n",
1645
+ " </tr>\n",
1646
+ " <tr>\n",
1647
+ " <td>8</td>\n",
1648
+ " <td>0.044400</td>\n",
1649
+ " <td>1.700445</td>\n",
1650
+ " <td>0.829268</td>\n",
1651
+ " </tr>\n",
1652
+ " <tr>\n",
1653
+ " <td>9</td>\n",
1654
+ " <td>0.019400</td>\n",
1655
+ " <td>1.871287</td>\n",
1656
+ " <td>0.843206</td>\n",
1657
+ " </tr>\n",
1658
+ " <tr>\n",
1659
+ " <td>10</td>\n",
1660
+ " <td>0.010000</td>\n",
1661
+ " <td>2.007586</td>\n",
1662
+ " <td>0.843206</td>\n",
1663
+ " </tr>\n",
1664
+ " <tr>\n",
1665
+ " <td>11</td>\n",
1666
+ " <td>0.018800</td>\n",
1667
+ " <td>2.013671</td>\n",
1668
+ " <td>0.836237</td>\n",
1669
+ " </tr>\n",
1670
+ " </tbody>\n",
1671
+ "</table><p>"
1672
+ ],
1673
+ "text/plain": [
1674
+ "<IPython.core.display.HTML object>"
1675
+ ]
1676
+ },
1677
+ "metadata": {},
1678
+ "output_type": "display_data"
1679
+ },
1680
+ {
1681
+ "output_type": "display_data",
1682
+ "data": {
1683
+ "text/plain": [
1684
+ "<IPython.core.display.HTML object>"
1685
+ ],
1686
+ "text/html": [
1687
+ "\n",
1688
+ " <div>\n",
1689
+ " \n",
1690
+ " <progress value='17220' max='17220' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
1691
+ " [17220/17220 45:41, Epoch 15/15]\n",
1692
+ " </div>\n",
1693
+ " <table border=\"1\" class=\"dataframe\">\n",
1694
+ " <thead>\n",
1695
+ " <tr style=\"text-align: left;\">\n",
1696
+ " <th>Epoch</th>\n",
1697
+ " <th>Training Loss</th>\n",
1698
+ " <th>Validation Loss</th>\n",
1699
+ " <th>Accuracy</th>\n",
1700
+ " </tr>\n",
1701
+ " </thead>\n",
1702
+ " <tbody>\n",
1703
+ " <tr>\n",
1704
+ " <td>1</td>\n",
1705
+ " <td>1.118400</td>\n",
1706
+ " <td>1.142566</td>\n",
1707
+ " <td>0.811847</td>\n",
1708
+ " </tr>\n",
1709
+ " <tr>\n",
1710
+ " <td>2</td>\n",
1711
+ " <td>1.023800</td>\n",
1712
+ " <td>0.884835</td>\n",
1713
+ " <td>0.818815</td>\n",
1714
+ " </tr>\n",
1715
+ " <tr>\n",
1716
+ " <td>3</td>\n",
1717
+ " <td>0.685000</td>\n",
1718
+ " <td>0.902960</td>\n",
1719
+ " <td>0.836237</td>\n",
1720
+ " </tr>\n",
1721
+ " <tr>\n",
1722
+ " <td>4</td>\n",
1723
+ " <td>0.322000</td>\n",
1724
+ " <td>1.255817</td>\n",
1725
+ " <td>0.815331</td>\n",
1726
+ " </tr>\n",
1727
+ " <tr>\n",
1728
+ " <td>5</td>\n",
1729
+ " <td>0.036000</td>\n",
1730
+ " <td>1.604122</td>\n",
1731
+ " <td>0.846690</td>\n",
1732
+ " </tr>\n",
1733
+ " <tr>\n",
1734
+ " <td>6</td>\n",
1735
+ " <td>0.045100</td>\n",
1736
+ " <td>1.756756</td>\n",
1737
+ " <td>0.853659</td>\n",
1738
+ " </tr>\n",
1739
+ " <tr>\n",
1740
+ " <td>7</td>\n",
1741
+ " <td>0.089400</td>\n",
1742
+ " <td>1.627898</td>\n",
1743
+ " <td>0.843206</td>\n",
1744
+ " </tr>\n",
1745
+ " <tr>\n",
1746
+ " <td>8</td>\n",
1747
+ " <td>0.044400</td>\n",
1748
+ " <td>1.700445</td>\n",
1749
+ " <td>0.829268</td>\n",
1750
+ " </tr>\n",
1751
+ " <tr>\n",
1752
+ " <td>9</td>\n",
1753
+ " <td>0.019400</td>\n",
1754
+ " <td>1.871287</td>\n",
1755
+ " <td>0.843206</td>\n",
1756
+ " </tr>\n",
1757
+ " <tr>\n",
1758
+ " <td>10</td>\n",
1759
+ " <td>0.010000</td>\n",
1760
+ " <td>2.007586</td>\n",
1761
+ " <td>0.843206</td>\n",
1762
+ " </tr>\n",
1763
+ " <tr>\n",
1764
+ " <td>11</td>\n",
1765
+ " <td>0.018800</td>\n",
1766
+ " <td>2.013671</td>\n",
1767
+ " <td>0.836237</td>\n",
1768
+ " </tr>\n",
1769
+ " <tr>\n",
1770
+ " <td>12</td>\n",
1771
+ " <td>0.000000</td>\n",
1772
+ " <td>2.092149</td>\n",
1773
+ " <td>0.843206</td>\n",
1774
+ " </tr>\n",
1775
+ " <tr>\n",
1776
+ " <td>13</td>\n",
1777
+ " <td>0.000000</td>\n",
1778
+ " <td>2.133086</td>\n",
1779
+ " <td>0.836237</td>\n",
1780
+ " </tr>\n",
1781
+ " <tr>\n",
1782
+ " <td>14</td>\n",
1783
+ " <td>0.000000</td>\n",
1784
+ " <td>2.149304</td>\n",
1785
+ " <td>0.836237</td>\n",
1786
+ " </tr>\n",
1787
+ " <tr>\n",
1788
+ " <td>15</td>\n",
1789
+ " <td>0.000000</td>\n",
1790
+ " <td>2.165603</td>\n",
1791
+ " <td>0.836237</td>\n",
1792
+ " </tr>\n",
1793
+ " </tbody>\n",
1794
+ "</table><p>"
1795
+ ]
1796
+ },
1797
+ "metadata": {}
1798
+ },
1799
+ {
1800
+ "output_type": "execute_result",
1801
+ "data": {
1802
+ "text/plain": [
1803
+ "TrainOutput(global_step=17220, training_loss=0.23190565678960665, metrics={'train_runtime': 2746.6484, 'train_samples_per_second': 6.269, 'train_steps_per_second': 6.269, 'total_flos': 2652418758988800.0, 'train_loss': 0.23190565678960665, 'epoch': 15.0})"
1804
+ ]
1805
+ },
1806
+ "metadata": {},
1807
+ "execution_count": 20
1808
+ }
1809
+ ]
1810
+ },
1811
+ {
1812
+ "cell_type": "code",
1813
+ "source": [
1814
+ "# !zip -r /content/checkpoint.zip /content/essayl0/checkpoint-1080/"
1815
+ ],
1816
+ "metadata": {
1817
+ "id": "s6wG4purBmfX"
1818
+ },
1819
+ "execution_count": null,
1820
+ "outputs": []
1821
+ }
1822
+ ]
1823
+ }