Ubuntu commited on
Commit
f81b82b
1 Parent(s): 479eb9c

added sentence status

Browse files
Files changed (2) hide show
  1. research/trials.ipynb +414 -37
  2. utils/get_sentence_status.py +2 -2
research/trials.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
@@ -11,32 +11,16 @@
11
  },
12
  {
13
  "cell_type": "code",
14
- "execution_count": 2,
15
  "metadata": {},
16
- "outputs": [
17
- {
18
- "name": "stderr",
19
- "output_type": "stream",
20
- "text": [
21
- "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
22
- " from .autonotebook import tqdm as notebook_tqdm\n"
23
- ]
24
- },
25
- {
26
- "name": "stderr",
27
- "output_type": "stream",
28
- "text": [
29
- "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
30
- ]
31
- }
32
- ],
33
  "source": [
34
  "from utils.get_sentence_status import complete_sentence_analysis"
35
  ]
36
  },
37
  {
38
  "cell_type": "code",
39
- "execution_count": 3,
40
  "metadata": {},
41
  "outputs": [
42
  {
@@ -64,7 +48,7 @@
64
  " 'avg_length': 24.166666666666668}"
65
  ]
66
  },
67
- "execution_count": 3,
68
  "metadata": {},
69
  "output_type": "execute_result"
70
  }
@@ -80,7 +64,7 @@
80
  },
81
  {
82
  "cell_type": "code",
83
- "execution_count": 4,
84
  "metadata": {},
85
  "outputs": [
86
  {
@@ -108,7 +92,7 @@
108
  " 'avg_length': 29.285714285714285}"
109
  ]
110
  },
111
- "execution_count": 4,
112
  "metadata": {},
113
  "output_type": "execute_result"
114
  }
@@ -124,7 +108,7 @@
124
  },
125
  {
126
  "cell_type": "code",
127
- "execution_count": 5,
128
  "metadata": {},
129
  "outputs": [
130
  {
@@ -152,7 +136,7 @@
152
  " 'avg_length': 19.625}"
153
  ]
154
  },
155
- "execution_count": 5,
156
  "metadata": {},
157
  "output_type": "execute_result"
158
  }
@@ -167,7 +151,7 @@
167
  },
168
  {
169
  "cell_type": "code",
170
- "execution_count": 6,
171
  "metadata": {},
172
  "outputs": [
173
  {
@@ -195,7 +179,7 @@
195
  " 'avg_length': 15.333333333333334}"
196
  ]
197
  },
198
- "execution_count": 6,
199
  "metadata": {},
200
  "output_type": "execute_result"
201
  }
@@ -212,7 +196,7 @@
212
  },
213
  {
214
  "cell_type": "code",
215
- "execution_count": 7,
216
  "metadata": {},
217
  "outputs": [
218
  {
@@ -240,7 +224,7 @@
240
  " 'avg_length': 16.666666666666668}"
241
  ]
242
  },
243
- "execution_count": 7,
244
  "metadata": {},
245
  "output_type": "execute_result"
246
  }
@@ -256,7 +240,7 @@
256
  },
257
  {
258
  "cell_type": "code",
259
- "execution_count": 8,
260
  "metadata": {},
261
  "outputs": [
262
  {
@@ -284,7 +268,7 @@
284
  " 'avg_length': 17.857142857142858}"
285
  ]
286
  },
287
- "execution_count": 8,
288
  "metadata": {},
289
  "output_type": "execute_result"
290
  }
@@ -305,7 +289,7 @@
305
  },
306
  {
307
  "cell_type": "code",
308
- "execution_count": 9,
309
  "metadata": {},
310
  "outputs": [
311
  {
@@ -333,7 +317,7 @@
333
  " 'avg_length': 26.2}"
334
  ]
335
  },
336
- "execution_count": 9,
337
  "metadata": {},
338
  "output_type": "execute_result"
339
  }
@@ -347,7 +331,7 @@
347
  },
348
  {
349
  "cell_type": "code",
350
- "execution_count": 10,
351
  "metadata": {},
352
  "outputs": [
353
  {
@@ -375,7 +359,7 @@
375
  " 'avg_length': 25.666666666666668}"
376
  ]
377
  },
378
- "execution_count": 10,
379
  "metadata": {},
380
  "output_type": "execute_result"
381
  }
@@ -391,7 +375,7 @@
391
  },
392
  {
393
  "cell_type": "code",
394
- "execution_count": 12,
395
  "metadata": {},
396
  "outputs": [
397
  {
@@ -419,7 +403,7 @@
419
  " 'avg_length': 28.0}"
420
  ]
421
  },
422
- "execution_count": 12,
423
  "metadata": {},
424
  "output_type": "execute_result"
425
  }
@@ -433,6 +417,399 @@
433
  ")"
434
  ]
435
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
436
  {
437
  "cell_type": "code",
438
  "execution_count": null,
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 13,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
 
11
  },
12
  {
13
  "cell_type": "code",
14
+ "execution_count": 14,
15
  "metadata": {},
16
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  "source": [
18
  "from utils.get_sentence_status import complete_sentence_analysis"
19
  ]
20
  },
21
  {
22
  "cell_type": "code",
23
+ "execution_count": 15,
24
  "metadata": {},
25
  "outputs": [
26
  {
 
48
  " 'avg_length': 24.166666666666668}"
49
  ]
50
  },
51
+ "execution_count": 15,
52
  "metadata": {},
53
  "output_type": "execute_result"
54
  }
 
64
  },
65
  {
66
  "cell_type": "code",
67
+ "execution_count": 16,
68
  "metadata": {},
69
  "outputs": [
70
  {
 
92
  " 'avg_length': 29.285714285714285}"
93
  ]
94
  },
95
+ "execution_count": 16,
96
  "metadata": {},
97
  "output_type": "execute_result"
98
  }
 
108
  },
109
  {
110
  "cell_type": "code",
111
+ "execution_count": 17,
112
  "metadata": {},
113
  "outputs": [
114
  {
 
136
  " 'avg_length': 19.625}"
137
  ]
138
  },
139
+ "execution_count": 17,
140
  "metadata": {},
141
  "output_type": "execute_result"
142
  }
 
151
  },
152
  {
153
  "cell_type": "code",
154
+ "execution_count": 18,
155
  "metadata": {},
156
  "outputs": [
157
  {
 
179
  " 'avg_length': 15.333333333333334}"
180
  ]
181
  },
182
+ "execution_count": 18,
183
  "metadata": {},
184
  "output_type": "execute_result"
185
  }
 
196
  },
197
  {
198
  "cell_type": "code",
199
+ "execution_count": 19,
200
  "metadata": {},
201
  "outputs": [
202
  {
 
224
  " 'avg_length': 16.666666666666668}"
225
  ]
226
  },
227
+ "execution_count": 19,
228
  "metadata": {},
229
  "output_type": "execute_result"
230
  }
 
240
  },
241
  {
242
  "cell_type": "code",
243
+ "execution_count": 20,
244
  "metadata": {},
245
  "outputs": [
246
  {
 
268
  " 'avg_length': 17.857142857142858}"
269
  ]
270
  },
271
+ "execution_count": 20,
272
  "metadata": {},
273
  "output_type": "execute_result"
274
  }
 
289
  },
290
  {
291
  "cell_type": "code",
292
+ "execution_count": 21,
293
  "metadata": {},
294
  "outputs": [
295
  {
 
317
  " 'avg_length': 26.2}"
318
  ]
319
  },
320
+ "execution_count": 21,
321
  "metadata": {},
322
  "output_type": "execute_result"
323
  }
 
331
  },
332
  {
333
  "cell_type": "code",
334
+ "execution_count": 22,
335
  "metadata": {},
336
  "outputs": [
337
  {
 
359
  " 'avg_length': 25.666666666666668}"
360
  ]
361
  },
362
+ "execution_count": 22,
363
  "metadata": {},
364
  "output_type": "execute_result"
365
  }
 
375
  },
376
  {
377
  "cell_type": "code",
378
+ "execution_count": 23,
379
  "metadata": {},
380
  "outputs": [
381
  {
 
403
  " 'avg_length': 28.0}"
404
  ]
405
  },
406
+ "execution_count": 23,
407
  "metadata": {},
408
  "output_type": "execute_result"
409
  }
 
417
  ")"
418
  ]
419
  },
420
+ {
421
+ "cell_type": "code",
422
+ "execution_count": 28,
423
+ "metadata": {},
424
+ "outputs": [
425
+ {
426
+ "name": "stdout",
427
+ "output_type": "stream",
428
+ "text": [
429
+ "logits: tensor([[-4.2804, 4.6454]])\n",
430
+ "P(Human): 0.00013289612\n",
431
+ "P(AI): 0.9998671\n",
432
+ "Label: AI written\n",
433
+ "POSITIVE\n",
434
+ "arr= [41, 38, 47]\n",
435
+ "variance: 14.0\n",
436
+ "std: 3.7416573867739413\n",
437
+ "average length: 42.0\n"
438
+ ]
439
+ },
440
+ {
441
+ "data": {
442
+ "text/plain": [
443
+ "{'p_human': 0.00013289612,\n",
444
+ " 'p_ai': 0.9998671,\n",
445
+ " 'label': 'AI written',\n",
446
+ " 'variance': 14.0,\n",
447
+ " 'avg_length': 42.0}"
448
+ ]
449
+ },
450
+ "execution_count": 28,
451
+ "metadata": {},
452
+ "output_type": "execute_result"
453
+ }
454
+ ],
455
+ "source": [
456
+ "# Human\n",
457
+ "complete_sentence_analysis(\n",
458
+ " '''Araku Valley the “Queen of Hills” is a picturesque hill station located in Andhra Pradesh, better known for its natural beauty, lush green landscapes, coffee plantations, and pleasant climate. It is situated in the Eastern Ghats mountain range and is popular with tourists, especially people seeking a peaceful getaway from nearby coastal cities like Visakhapatnam which is hot and humid.\n",
459
+ "\n",
460
+ "Scenic Paradise in the Eastern Ghats\n",
461
+ "The picturesque hill station, renowned for its lush greenery, tribal heritage, and coffee plantations, beckons travelers to experience a rejuvenating escape from the hustle and bustle of urban life.'''\n",
462
+ ")"
463
+ ]
464
+ },
465
+ {
466
+ "cell_type": "code",
467
+ "execution_count": 29,
468
+ "metadata": {},
469
+ "outputs": [
470
+ {
471
+ "name": "stdout",
472
+ "output_type": "stream",
473
+ "text": [
474
+ "logits: tensor([[ 2.8461, -2.8003]])\n",
475
+ "P(Human): 0.99648213\n",
476
+ "P(AI): 0.0035178645\n",
477
+ "Label: Human Written\n",
478
+ "NEGATIVE\n",
479
+ "arr= [24, 39, 28, 28, 19]\n",
480
+ "variance: 43.44\n",
481
+ "std: 6.590902821313632\n",
482
+ "average length: 27.6\n"
483
+ ]
484
+ },
485
+ {
486
+ "data": {
487
+ "text/plain": [
488
+ "{'p_human': 0.99648213,\n",
489
+ " 'p_ai': 0.0035178645,\n",
490
+ " 'label': 'Human Written',\n",
491
+ " 'variance': 43.44,\n",
492
+ " 'avg_length': 27.6}"
493
+ ]
494
+ },
495
+ "execution_count": 29,
496
+ "metadata": {},
497
+ "output_type": "execute_result"
498
+ }
499
+ ],
500
+ "source": [
501
+ "# Human\n",
502
+ "complete_sentence_analysis(\n",
503
+ " '''Ranbir Kapoor is all set to bring to your screen an action thriller film titled ‘Animal’. It has been making news for quite some time now for its stellar cast which includes names like Anil Kapoor, Rashmika Mandana, Bobby Deol, and Tripti Dimri. It is set to be directed by Sandeep Reddy Vanga who is known for films like Arjun Reddy and Kabir Singh. The makers of the film recently dropped the teaser of the film where we saw Ranbir in a very ferocious avatar, ready to draw blood. The makers are now releasing a new song from the film tomorrow, check out the poster below…'''\n",
504
+ ")"
505
+ ]
506
+ },
507
+ {
508
+ "cell_type": "code",
509
+ "execution_count": 33,
510
+ "metadata": {},
511
+ "outputs": [
512
+ {
513
+ "name": "stdout",
514
+ "output_type": "stream",
515
+ "text": [
516
+ "logits: tensor([[-5.2793, 5.4920]])\n",
517
+ "P(Human): 2.0992544e-05\n",
518
+ "P(AI): 0.999979\n",
519
+ "Label: AI written\n",
520
+ "POSITIVE\n",
521
+ "arr= [24, 9, 5, 23]\n",
522
+ "variance: 70.1875\n",
523
+ "std: 8.37779804005802\n",
524
+ "average length: 15.25\n"
525
+ ]
526
+ },
527
+ {
528
+ "data": {
529
+ "text/plain": [
530
+ "{'p_human': 2.0992544e-05,\n",
531
+ " 'p_ai': 0.999979,\n",
532
+ " 'label': 'AI written',\n",
533
+ " 'variance': 70.1875,\n",
534
+ " 'avg_length': 15.25}"
535
+ ]
536
+ },
537
+ "execution_count": 33,
538
+ "metadata": {},
539
+ "output_type": "execute_result"
540
+ }
541
+ ],
542
+ "source": [
543
+ "complete_sentence_analysis(\n",
544
+ " '''Overall, it is a wonderful destination for nature lovers, adventurers, and anyone looking to experience a serene and tranquil environment.\n",
545
+ "\n",
546
+ "Araku Valley attractions catering to diverse interests:\n",
547
+ "Borra Caves: These ancient limestone caves, adorned with stunning stalactites and stalagmites, provide an enchanting underground experience.'''\n",
548
+ ")"
549
+ ]
550
+ },
551
+ {
552
+ "cell_type": "code",
553
+ "execution_count": 34,
554
+ "metadata": {},
555
+ "outputs": [
556
+ {
557
+ "name": "stdout",
558
+ "output_type": "stream",
559
+ "text": [
560
+ "logits: tensor([[-4.6586, 4.9331]])\n",
561
+ "P(Human): 6.828416e-05\n",
562
+ "P(AI): 0.9999317\n",
563
+ "Label: AI written\n",
564
+ "POSITIVE\n",
565
+ "arr= [27, 11, 1, 8, 23, 10, 28]\n",
566
+ "variance: 94.53061224489797\n",
567
+ "std: 9.722685444099175\n",
568
+ "average length: 15.428571428571429\n"
569
+ ]
570
+ },
571
+ {
572
+ "data": {
573
+ "text/plain": [
574
+ "{'p_human': 6.828416e-05,\n",
575
+ " 'p_ai': 0.9999317,\n",
576
+ " 'label': 'AI written',\n",
577
+ " 'variance': 94.53061224489797,\n",
578
+ " 'avg_length': 15.428571428571429}"
579
+ ]
580
+ },
581
+ "execution_count": 34,
582
+ "metadata": {},
583
+ "output_type": "execute_result"
584
+ }
585
+ ],
586
+ "source": [
587
+ "complete_sentence_analysis(\n",
588
+ " '''Hinton's career in AI began as an attempt to simulate a neural network on a computer, inspired by his fascination with the human brain. However, his early endeavors faced skepticism, and his Ph.D. advisor advised him to abandon the pursuit. Nevertheless, Hinton persisted in his quest to understand the human mind, ultimately leading to the development of artificial neural networks.\n",
589
+ "\n",
590
+ "\"It took much, much longer than I expected. It took, like, 50 years before it worked well, but in the end, it did work well,\" Hinton reflected on the journey.'''\n",
591
+ "\n",
592
+ "# In 2019, Geoffrey Hinton, along with collaborators Yann Lecun and Yoshua Bengio, received the Turing Award, often described as the Nobel Prize of computing, for their pioneering work on artificial neural networks. Their innovations have played a pivotal role in enabling machines to \"learn to learn.\"\n",
593
+ "\n",
594
+ "# During the interview, CBS News took viewers inside Google's AI lab in London, where robots were showcased as an example of machine learning in action. Notably, these robots were not explicitly programmed to play soccer; they were instructed to score goals and had to learn the game on their own through trial and error, a testament to the power of AI.'''\n",
595
+ ")"
596
+ ]
597
+ },
598
+ {
599
+ "cell_type": "code",
600
+ "execution_count": 42,
601
+ "metadata": {},
602
+ "outputs": [
603
+ {
604
+ "name": "stdout",
605
+ "output_type": "stream",
606
+ "text": [
607
+ "logits: tensor([[ 8.0981, -7.5304]])\n",
608
+ "P(Human): 0.9999999\n",
609
+ "P(AI): 1.6315956e-07\n",
610
+ "Label: Human Written\n",
611
+ "NEGATIVE\n",
612
+ "arr= [14, 40, 30, 31, 15, 15, 16, 13]\n",
613
+ "variance: 93.4375\n",
614
+ "std: 9.666307464590602\n",
615
+ "average length: 21.75\n"
616
+ ]
617
+ },
618
+ {
619
+ "data": {
620
+ "text/plain": [
621
+ "{'p_human': 0.9999999,\n",
622
+ " 'p_ai': 1.6315956e-07,\n",
623
+ " 'label': 'Human Written',\n",
624
+ " 'variance': 93.4375,\n",
625
+ " 'avg_length': 21.75}"
626
+ ]
627
+ },
628
+ "execution_count": 42,
629
+ "metadata": {},
630
+ "output_type": "execute_result"
631
+ }
632
+ ],
633
+ "source": [
634
+ "complete_sentence_analysis(\n",
635
+ " '''\n",
636
+ "2015 was a year when I looked at this blog with some professional seriousness. All these years it has been a platform for my expression, for sharing the joys of traveling, connecting with fellow travelers and learning from them about new destinations or the new way to look at old destinations. It has been a place where I experienced magic like with the story of a Bihar boy or when unknown people wrote to me about how their lives were impacted.\n",
637
+ "\n",
638
+ "However, in the last couple of years, I saw a commercial interest in the blog and travel blogging emerging as a career option that many youngsters took up voluntarily. So this year, I decided to open IndiTales for commercial associations. I did a couple of small campaigns with some leading brands in the travel domain. I was invited by many tourism boards and hospitality brands to visit their destinations and properties. So, a decent beginning and hope to take it forward in 2016.\n",
639
+ "\n",
640
+ "''')"
641
+ ]
642
+ },
643
+ {
644
+ "cell_type": "code",
645
+ "execution_count": 45,
646
+ "metadata": {},
647
+ "outputs": [
648
+ {
649
+ "name": "stdout",
650
+ "output_type": "stream",
651
+ "text": [
652
+ "logits: tensor([[ 2.8461, -2.8003]])\n",
653
+ "P(Human): 0.99648213\n",
654
+ "P(AI): 0.0035178645\n",
655
+ "Label: Human Written\n",
656
+ "NEGATIVE\n",
657
+ "arr= [24, 39, 28, 28, 19]\n",
658
+ "variance: 43.44\n",
659
+ "std: 6.590902821313632\n",
660
+ "average length: 27.6\n"
661
+ ]
662
+ },
663
+ {
664
+ "data": {
665
+ "text/plain": [
666
+ "{'p_human': 0.99648213,\n",
667
+ " 'p_ai': 0.0035178645,\n",
668
+ " 'label': 'Human Written',\n",
669
+ " 'variance': 43.44,\n",
670
+ " 'avg_length': 27.6}"
671
+ ]
672
+ },
673
+ "execution_count": 45,
674
+ "metadata": {},
675
+ "output_type": "execute_result"
676
+ }
677
+ ],
678
+ "source": [
679
+ "complete_sentence_analysis(\n",
680
+ " '''Ranbir Kapoor is all set to bring to your screen an action thriller film titled ‘Animal’. It has been making news for quite some time now for its stellar cast which includes names like Anil Kapoor, Rashmika Mandana, Bobby Deol, and Tripti Dimri. It is set to be directed by Sandeep Reddy Vanga who is known for films like Arjun Reddy and Kabir Singh. The makers of the film recently dropped the teaser of the film where we saw Ranbir in a very ferocious avatar, ready to draw blood. The makers are now releasing a new song from the film tomorrow, check out the poster below…'''\n",
681
+ ")"
682
+ ]
683
+ },
684
+ {
685
+ "cell_type": "code",
686
+ "execution_count": 48,
687
+ "metadata": {},
688
+ "outputs": [
689
+ {
690
+ "name": "stdout",
691
+ "output_type": "stream",
692
+ "text": [
693
+ "logits: tensor([[ 8.1121, -7.5430]])\n",
694
+ "P(Human): 0.9999999\n",
695
+ "P(AI): 1.5887993e-07\n",
696
+ "Label: Human Written\n",
697
+ "NEGATIVE\n",
698
+ "arr= [13, 30, 37, 24, 26, 20, 24, 22]\n",
699
+ "variance: 43.5\n",
700
+ "std: 6.59545297913646\n",
701
+ "average length: 24.5\n"
702
+ ]
703
+ },
704
+ {
705
+ "data": {
706
+ "text/plain": [
707
+ "{'p_human': 0.9999999,\n",
708
+ " 'p_ai': 1.5887993e-07,\n",
709
+ " 'label': 'Human Written',\n",
710
+ " 'variance': 43.5,\n",
711
+ " 'avg_length': 24.5}"
712
+ ]
713
+ },
714
+ "execution_count": 48,
715
+ "metadata": {},
716
+ "output_type": "execute_result"
717
+ }
718
+ ],
719
+ "source": [
720
+ "complete_sentence_analysis(\n",
721
+ " # '''Data, documents, and processes may be stored across teams and tools. By linking relevant pieces of data, you can build systems that can recommend people to projects, connect related projects, or centralize access to avoid duplicate efforts. You can extract entities from text-heavy content such as emails, word documents, PDF, and spreadsheets or meta-data from video, audio, and photos to build a knowledge graph. You can augment this knowledge graph with structured data from CRM and ERP systems to get a comprehensive view about a product.'''\n",
722
+ "'''Data, documents, and processes may be stored across teams and tools. By linking relevant pieces of data, you can build systems that can recommend people to projects, connect related projects, or centralize access to avoid duplicate efforts. You can extract entities from text-heavy content such as emails, word documents, PDF, and spreadsheets or meta-data from video, audio, and photos to build a knowledge graph. You can augment this knowledge graph with structured data from CRM and ERP systems to get a comprehensive view about a product.\n",
723
+ "In manufacturing, you can track the different stages building and delivering a product from changes to inventory levels to store shipments using a knowledge graph. In life sciences, you can use a knowledge graph to track an experiment, trails and characteristics of drugs. In financial services, you can build a knowledge graph for the holding company of a security, the security and the beneficial holding. You can augment this graph with social media, industry events to record the relations to provide insights into dependencies between firms.'''\n",
724
+ "\n",
725
+ "\n",
726
+ ")"
727
+ ]
728
+ },
729
+ {
730
+ "cell_type": "code",
731
+ "execution_count": 50,
732
+ "metadata": {},
733
+ "outputs": [
734
+ {
735
+ "data": {
736
+ "text/plain": [
737
+ "'/home/ubuntu'"
738
+ ]
739
+ },
740
+ "execution_count": 50,
741
+ "metadata": {},
742
+ "output_type": "execute_result"
743
+ }
744
+ ],
745
+ "source": []
746
+ },
747
+ {
748
+ "cell_type": "code",
749
+ "execution_count": 51,
750
+ "metadata": {},
751
+ "outputs": [],
752
+ "source": [
753
+ "# 1 \n",
754
+ "# >3 \n",
755
+ "\n",
756
+ "\n",
757
+ "import pandas as pd \n",
758
+ "\n",
759
+ "\n",
760
+ "df= pd.read_csv(\n",
761
+ " '/home/ubuntu/SentenceStructureComparision/data/AI_checker_gpt3_remade.csv'\n",
762
+ ")"
763
+ ]
764
+ },
765
+ {
766
+ "cell_type": "code",
767
+ "execution_count": 55,
768
+ "metadata": {},
769
+ "outputs": [
770
+ {
771
+ "data": {
772
+ "text/plain": [
773
+ "['Syd Rapson (born Sydney Rapson, 28 April 1932 – 28 April 2017) was an English Labour Party politician who served as Member of Parliament (MP) for Portsmouth South from 1997 to 2010.',\n",
774
+ " '',\n",
775
+ " 'Rapson was born in Portsmouth and educated at Portsmouth Grammar School and the University of Southampton. He worked as a teacher and a lecturer before entering politics. He was a councillor on Portsmouth City Council from 1973 to 1997, and was leader of the council from 1983 to 1997.',\n",
776
+ " '',\n",
777
+ " 'Rapson was elected to the House of Commons at the 1997 general election, and held the seat until he stood down at the 2010 general election. He was a member of the Defence Select Committee from 1997 to 2010, and was a member of the Parliamentary Labour Party Defence Committee from 1997 to 2005. He was also a member of the Parliamentary Labour Party Home Affairs Committee from 2005 to 2010.']"
778
+ ]
779
+ },
780
+ "execution_count": 55,
781
+ "metadata": {},
782
+ "output_type": "execute_result"
783
+ }
784
+ ],
785
+ "source": [
786
+ "# df.head(5).values[0][0].split(\"\\n\")"
787
+ ]
788
+ },
789
+ {
790
+ "cell_type": "code",
791
+ "execution_count": 60,
792
+ "metadata": {},
793
+ "outputs": [
794
+ {
795
+ "data": {
796
+ "text/plain": [
797
+ "['Rick Mahler (born Richard Alan Mahler on April 18, 1957 in Atlanta, Georgia) was an American professional baseball pitcher who played for the Atlanta Braves, Cincinnati Reds, Montreal Expos, and Minnesota Twins of Major League Baseball (MLB) from 1978 to 1990. He was a two-time All-Star and won the National League (NL) ERA title in 1985.',\n",
798
+ " '',\n",
799
+ " \"Mahler was drafted by the Braves in the first round of the 1975 amateur draft. He made his major league debut with the Braves in 1978, and was a member of the team's starting rotation for the next five seasons. He was an All-Star in 1983 and 1985, and won the NL ERA title in 1985 with a 2.87 ERA. He was traded to the Reds in 1984, and then to the Expos in 1986. He was traded to the Twins in 1989, and finished his career with them in 1990.\",\n",
800
+ " '',\n",
801
+ " 'Mahler was known for his durability, as he pitched over 200 innings in six consecutive seasons from 1983 to 1988. He finished his career with a record of 119–121, a 3.90 ERA, and 1,07']"
802
+ ]
803
+ },
804
+ "execution_count": 60,
805
+ "metadata": {},
806
+ "output_type": "execute_result"
807
+ }
808
+ ],
809
+ "source": [
810
+ "# df.head(5).values[4][0].split(\"\\n\")\n"
811
+ ]
812
+ },
813
  {
814
  "cell_type": "code",
815
  "execution_count": null,
utils/get_sentence_status.py CHANGED
@@ -9,7 +9,7 @@ tokenizer = AutoTokenizer.from_pretrained("gpt3_finetuned_model/checkpoint-30048
9
  tokenizer_v2 = AutoTokenizer.from_pretrained("gpt2-large")
10
 
11
 
12
- model = AutoModelForSequenceClassification.from_pretrained("gpt3_finetuned_model/checkpoint-30048")
13
 
14
 
15
  def split_sentence(sentence:str):
@@ -30,7 +30,7 @@ def predict(sentence: str):
30
  '''
31
  Returns (probability_human, probability_AI, label)
32
  '''
33
- inputs = tokenizer(sentence, return_tensors="pt")
34
  with torch.no_grad():
35
  logits = model(**inputs).logits
36
 
 
9
  tokenizer_v2 = AutoTokenizer.from_pretrained("gpt2-large")
10
 
11
 
12
+ model = AutoModelForSequenceClassification.from_pretrained("gpt3_finetuned_model/checkpoint-30048").to("cuda")
13
 
14
 
15
  def split_sentence(sentence:str):
 
30
  '''
31
  Returns (probability_human, probability_AI, label)
32
  '''
33
+ inputs = tokenizer(sentence, return_tensors="pt").to("cuda")
34
  with torch.no_grad():
35
  logits = model(**inputs).logits
36