{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os; os.chdir('..')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "s= '''Ah, spring. It's our season of hope, a time when nature stirs from its winter slumber. The days lengthen, the frost surrenders its grip, and a world once dormant reawakens. The air, once frigid and crisp, transforms, carrying the gentle fragrance of blooming flowers and fresh grass that invigorates the senses. Spring, a time of magic, sees nature's dormant forces burst forth in a vivid spectacle of colors and life.\n", "\n", "The trees, once bare, now bud, and delicate green leaves unfurl, creating a lush canopy overhead. Cherry blossoms, daffodils, and tulips paint gardens and parks with their vibrant palettes, infusing the landscape with joy. Birds, returning from their long migrations, fill the air with their'''" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Ah, spring',\n", " \"It's our season of hope, a time when nature stirs from its winter slumber\",\n", " 'The days lengthen, the frost surrenders its grip, and a world once dormant reawakens',\n", " 'The air, once frigid and crisp, transforms, carrying the gentle fragrance of blooming flowers and fresh grass that invigorates the senses',\n", " \"Spring, a time of magic, sees nature's dormant forces burst forth in a vivid spectacle of colors and life\",\n", " 'The trees, once bare, now bud, and delicate green leaves unfurl, creating a lush canopy overhead',\n", " 'Cherry blossoms, daffodils, and tulips paint gardens and parks with their vibrant palettes, infusing the landscape with joy',\n", " 'Birds, returning from their long migrations, fill the air with their']" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import re\n", "\n", "def split_sentence(sentence:str):\n", " # Create a regular expression pattern from the list of separators\n", " sentence= sentence.replace('\\n', '')\n", " separators = ['. ', '.', ':']\n", " \n", " pattern = '|'.join(map(re.escape, separators))\n", "\n", " # Split the sentence using the pattern as a delimiter\n", " parts = re.split(pattern, sentence)\n", "\n", " return parts\n", "\n", "split_sentence(s)\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" ] } ], "source": [ "from transformers import AutoTokenizer\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(\"gpt3_finetuned_model/checkpoint-30048\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "tokenizer = AutoTokenizer.from_pretrained(\"gpt2-large\")\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "def calculate_burst(list_of_sentences):\n", " arr= []\n", " for i in list_of_sentences:\n", " ei= tokenizer(i, return_tensors=\"pt\")\n", " arr.append(ei.input_ids.size(1))\n", " print(f\"arr= {(arr)}\")\n", " print(f'variance: {np.var(np.array(arr))}')\n", " print(f'std: {np.std(np.array(arr))}')\n", " print(f'average length: {np.average(np.array(arr))}')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Ah, spring',\n", " \"It's our season of hope, a time when nature stirs from its winter slumber\",\n", " 'The days lengthen, the frost surrenders its grip, and a world once dormant reawakens',\n", " 'The air, once frigid and crisp, transforms, carrying the gentle fragrance of blooming flowers and fresh grass that invigorates the senses',\n", " \"Spring, a time of magic, sees nature's dormant forces burst forth in a vivid spectacle of colors and life\",\n", " 'The trees, once bare, now bud, and delicate green leaves unfurl, creating a lush canopy overhead',\n", " 'Cherry blossoms, daffodils, and tulips paint gardens and parks with their vibrant palettes, infusing the landscape with joy',\n", " 'Birds, returning from their long migrations, fill the air with their']" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list_of_sentences= split_sentence(s)\n", "list_of_sentences" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "arr= [3, 18, 21, 28, 22, 21, 29, 15]\n", "variance: 58.484375\n", "std: 7.647507763971214\n", "average length: 19.625\n" ] } ], "source": [ "calculate_burst(list_of_sentences)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Model Prediction\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" ] } ], "source": [ "from transformers import AutoTokenizer\n", "from transformers import AutoModelForSequenceClassification\n", "import torch\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(\"gpt3_finetuned_model/checkpoint-30048\")\n", "\n", "model = AutoModelForSequenceClassification.from_pretrained(\"gpt3_finetuned_model/checkpoint-30048\")\n", "\n" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "from torch.nn import functional as F\n", "def predict(sentence):\n", " inputs = tokenizer(sentence, return_tensors=\"pt\")\n", " with torch.no_grad():\n", " logits = model(**inputs).logits\n", " \n", " print(\"logits: \", logits)\n", " predicted_class_id = logits.argmax().item()\n", " # get probabilities using softmax from logit score and convert it to numpy array\n", " probabilities_scores = F.softmax(logits, dim = -1).numpy()[0]\n", " print(\"P(Human): \", probabilities_scores[0])\n", " print(\"P(AI): \", probabilities_scores[1])\n", " label= \"Human Written\" if model.config.id2label[predicted_class_id]=='NEGATIVE' else 'AI written'\n", " print(\"Label: \", label)\n", " print(model.config.id2label[predicted_class_id])\n", " \n", "\n" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "logits: tensor([[-7.7618, 7.7867]])\n", "P(Human): 1.7674812e-07\n", "P(AI): 0.9999999\n", "Label: AI written\n", "POSITIVE\n" ] } ], "source": [ "predict('''The Flash (or simply Flash) is the name of several superheroes in the DC Comics universe. Each iteration of the character possesses superhuman speed, allowing them to move at incredible velocities, run on water, phase through solid objects, and even time travel. The most iconic Flash is Barry Allen, who first appeared in 1956 and became the Scarlet Speedster known for his distinctive red costume with a lightning bolt emblem.\n", "\n", "Barry Allen's origin story involves a lightning strike combined with a chemical accident, granting him his incredible speed powers. He adopts the superhero persona of The Flash to fight crime in Central City. His adventures often revolve around thwarting supervillains and metahuman threats, while also serving as a founding member of the Justice League.\n", "\n", "''')" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "logits: tensor([[ 8.0190, -7.4839]])\n", "P(Human): 0.99999976\n", "P(AI): 1.8500727e-07\n", "Label: Human Written\n", "NEGATIVE\n" ] } ], "source": [ "predict(\n", " '''The Flash first appeared in the Golden Age Flash Comics #1 (January 1940), from All-American Publications, one of three companies that would eventually merge to form DC Comics. Created by writer Gardner Fox and artist Harry Lampert, this Flash was Jay Garrick, a college student who gained his speed through the inhalation of hard water vapors. When re-introduced in the 1960s Garrick's origin was modified slightly, gaining his powers through exposure to heavy water.\n", "\n", "Jay Garrick was a popular character in the 1940s, supporting both Flash Comics and All-Flash Quarterly (later published bi-monthly as simply All-Flash); co-starring in Comic Cavalcade; and being a charter member of the Justice Society of America, the first superhero team, whose adventures ran in All Star Comics. With superheroes' post-war decline in popularity, Flash Comics was canceled with issue #104 (1949) which featured an evil version of the Flash called the Rival. The Justice Society's final Golden Age story ran in All Star Comics #57 (1951; the title itself continued as All Star Western).'''\n", ")" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "logits: tensor([[ 7.9124, -7.3888]])\n", "P(Human): 0.99999976\n", "P(AI): 2.2633e-07\n", "Label: Human Written\n", "NEGATIVE\n" ] } ], "source": [ "predict(\n", " '''Virat Kohli (Hindi pronunciation: [ʋɪˈɾɑːʈ ˈkoːɦli] ⓘ; born 5 November 1988) is an Indian international cricketer and the former captain of the Indian national cricket team who plays for Royal Challengers Bangalore in the IPL and Delhi in domestic cricket. Considered to be one of the best cricketers in the world, he is widely regarded as one of the greatest batsmen in the history of the sport.[4] Nicknamed \"The King\", due to his dominant style of play and popularity, Kohli holds numerous records in his career across all formats. In 2020, the International Cricket Council named him the male cricketer of the decade. Kohli has also contributed to India's successes, captaining the team from 2014 to 2022, and winning the 2011 World Cup and the 2013 Champions trophy. He is among the only four Indian cricketers who have played over 500 matches for India.[5]\n", "\n", "Born and raised in New Delhi, Kohli trained at the West Delhi Cricket Academy and started his youth career with the Delhi Under-15 team. He made his international debut in 2008 and quickly became a key player in the ODI team and later made his Test debut in 2011. In 2013, Kohli reached the number one spot in the ICC rankings for ODI batsmen for the first time. During 2014 T20 World Cup, he set a record for the most runs scored in the tournament. In 2018, he achieved yet another milestone, becoming the world's top-ranked Test batsman, making him the only Indian cricketer to hold the number one spot in all three formats of the game. His form continued in 2019, when he became the first player to score 20,000 international runs in a single decade. In 2021, Kohli made the decision to step down as the captain of the Indian national team for T20Is, following the T20 World Cup and in early 2022 he stepped down as the captain of the Test team as well.'''\n", ")" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "logits: tensor([[-8.4224, 8.2709]])\n", "P(Human): 5.6263374e-08\n", "P(AI): 1.0\n", "Label: AI written\n", "POSITIVE\n" ] } ], "source": [ "predict(\n", " '''Virat Kohli is an Indian cricketing sensation who has left an indelible mark on the world of sports. Born in Delhi, India, Kohli's journey from a young aspiring cricketer to becoming one of the greatest batsmen in the history of the game is nothing short of remarkable.\n", "\n", "Kohli's cricketing prowess was evident from a tender age, and he quickly rose through the ranks of junior cricket in India. He made his debut for the Indian national team in 2008, and since then, he has been a symbol of consistency and excellence. His distinctive blend of aggression and technical finesse at the crease has earned him a reputation as a modern-day batting maestro.'''\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 2 }