{"cells":[{"cell_type":"code","execution_count":1,"metadata":{"execution":{"iopub.execute_input":"2024-05-20T11:21:48.184387Z","iopub.status.busy":"2024-05-20T11:21:48.183665Z","iopub.status.idle":"2024-05-20T11:24:45.446401Z","shell.execute_reply":"2024-05-20T11:24:45.444922Z","shell.execute_reply.started":"2024-05-20T11:21:48.184353Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["Collecting transformers==4.33.0\n"," Using cached transformers-4.33.0-py3-none-any.whl.metadata (119 kB)\n","Collecting accelerate==0.22.0\n"," Using cached accelerate-0.22.0-py3-none-any.whl.metadata (17 kB)\n","Collecting einops==0.6.1\n"," Using cached einops-0.6.1-py3-none-any.whl.metadata (12 kB)\n","Collecting langchain==0.0.300\n"," Using cached langchain-0.0.300-py3-none-any.whl.metadata (15 kB)\n","Collecting xformers==0.0.21\n"," Using cached xformers-0.0.21.tar.gz (22.3 MB)\n"]},{"name":"stderr","output_type":"stream","text":["ERROR: Could not install packages due to an OSError: [Errno 2] No such file or directory: 'C:\\\\Users\\\\siddi\\\\AppData\\\\Local\\\\Temp\\\\pip-install-cyo5pvb9\\\\xformers_0b8bc7bb638a4071b04db9b2990bef40\\\\third_party/flash-attention/csrc/cutlass/docs/classcutlass_1_1epilogue_1_1thread_1_1LinearCombinationRelu_3_01ElementOutput___00_01Count_00_014d4e40c4295be6a8d8778d86e94fe14a.html'\n","HINT: This error might have occurred since this system does not have Windows Long Path support enabled. You can find information on how to enable this at https://pip.pypa.io/warnings/enable-long-paths\n","\n"]}],"source":["!pip install transformers==4.33.0 accelerate==0.22.0 einops==0.6.1 langchain==0.0.300 xformers==0.0.21 \\\n","bitsandbytes==0.41.1 sentence_transformers==2.2.2 chromadb==0.4.12"]},{"cell_type":"code","execution_count":2,"metadata":{"execution":{"iopub.execute_input":"2024-05-20T11:25:39.463684Z","iopub.status.busy":"2024-05-20T11:25:39.462969Z","iopub.status.idle":"2024-05-20T11:25:47.108271Z","shell.execute_reply":"2024-05-20T11:25:47.107327Z","shell.execute_reply.started":"2024-05-20T11:25:39.463651Z"},"trusted":true},"outputs":[],"source":["from torch import cuda, bfloat16\n","import torch\n","import transformers\n","from transformers import AutoTokenizer\n","from time import time\n","#import chromadb\n","#from chromadb.config import Settings\n","from langchain.llms import HuggingFacePipeline\n","from langchain.document_loaders import TextLoader\n","from langchain.text_splitter import RecursiveCharacterTextSplitter\n","from langchain.embeddings import HuggingFaceEmbeddings\n","from langchain.chains import RetrievalQA\n","from langchain.vectorstores import Chroma\n","from langchain import PromptTemplate, LLMChain"]},{"cell_type":"code","execution_count":3,"metadata":{"execution":{"iopub.execute_input":"2024-05-20T11:25:48.508728Z","iopub.status.busy":"2024-05-20T11:25:48.507725Z","iopub.status.idle":"2024-05-20T11:26:01.796161Z","shell.execute_reply":"2024-05-20T11:26:01.794844Z","shell.execute_reply.started":"2024-05-20T11:25:48.508689Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["Note: you may need to restart the kernel to use updated packages.\n"]}],"source":["pip install -q accelerate bitsandbytes\n"]},{"cell_type":"code","execution_count":4,"metadata":{"execution":{"iopub.execute_input":"2024-05-20T11:26:04.152407Z","iopub.status.busy":"2024-05-20T11:26:04.152021Z","iopub.status.idle":"2024-05-20T11:26:04.201802Z","shell.execute_reply":"2024-05-20T11:26:04.200747Z","shell.execute_reply.started":"2024-05-20T11:26:04.152376Z"},"trusted":true},"outputs":[],"source":["model_id = '/kaggle/input/llama-2/pytorch/7b-chat-hf/1'\n","\n","device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'\n","\n","# set quantization configuration to load large model with less GPU memory\n","# this requires the `bitsandbytes` library\n","bnb_config = transformers.BitsAndBytesConfig(\n"," load_in_4bit=True,\n"," bnb_4bit_quant_type='nf4',\n"," bnb_4bit_use_double_quant=True,\n"," bnb_4bit_compute_dtype=bfloat16\n",")"]},{"cell_type":"code","execution_count":5,"metadata":{"execution":{"iopub.execute_input":"2024-05-20T11:26:09.157715Z","iopub.status.busy":"2024-05-20T11:26:09.156819Z","iopub.status.idle":"2024-05-20T11:29:10.539621Z","shell.execute_reply":"2024-05-20T11:29:10.538631Z","shell.execute_reply.started":"2024-05-20T11:26:09.157684Z"},"trusted":true},"outputs":[{"name":"stderr","output_type":"stream","text":["2024-05-20 11:26:12.744027: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n","2024-05-20 11:26:12.744143: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n","2024-05-20 11:26:12.880595: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"e3757c76baca4aa4a33b67066b3a9607","version_major":2,"version_minor":0},"text/plain":["Loading checkpoint shards: 0%| | 0/2 [00:00=2.0.0 in /opt/conda/lib/python3.10/site-packages (from pdfminer.six) (3.3.2)\n","Requirement already satisfied: cryptography>=36.0.0 in /opt/conda/lib/python3.10/site-packages (from pdfminer.six) (41.0.7)\n","Requirement already satisfied: cffi>=1.12 in /opt/conda/lib/python3.10/site-packages (from cryptography>=36.0.0->pdfminer.six) (1.16.0)\n","Requirement already satisfied: pycparser in /opt/conda/lib/python3.10/site-packages (from cffi>=1.12->cryptography>=36.0.0->pdfminer.six) (2.21)\n","Downloading pdfminer.six-20231228-py3-none-any.whl (5.6 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m49.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n","\u001b[?25hInstalling collected packages: pdfminer.six\n","Successfully installed pdfminer.six-20231228\n","Note: you may need to restart the kernel to use updated packages.\n"]}],"source":["pip install pdfminer.six"]},{"cell_type":"code","execution_count":12,"metadata":{"execution":{"iopub.execute_input":"2024-05-20T12:01:17.301998Z","iopub.status.busy":"2024-05-20T12:01:17.301573Z","iopub.status.idle":"2024-05-20T12:01:31.132363Z","shell.execute_reply":"2024-05-20T12:01:31.131008Z","shell.execute_reply.started":"2024-05-20T12:01:17.301961Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n","To disable this warning, you can either:\n","\t- Avoid using `tokenizers` before the fork if possible\n","\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n","Collecting PyPdf2\n"," Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)\n","Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m232.6/232.6 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n","\u001b[?25hInstalling collected packages: PyPdf2\n","Successfully installed PyPdf2-3.0.1\n"]}],"source":["!pip install PyPdf2"]},{"cell_type":"code","execution_count":13,"metadata":{"execution":{"iopub.execute_input":"2024-05-20T12:01:36.180129Z","iopub.status.busy":"2024-05-20T12:01:36.179145Z","iopub.status.idle":"2024-05-20T12:01:36.364110Z","shell.execute_reply":"2024-05-20T12:01:36.363199Z","shell.execute_reply.started":"2024-05-20T12:01:36.180092Z"},"trusted":true},"outputs":[],"source":["from PyPDF2 import PdfReader\n","pdfreader = PdfReader('/kaggle/input/ai-dataset/ai.pdf')"]},{"cell_type":"code","execution_count":14,"metadata":{"execution":{"iopub.execute_input":"2024-05-20T12:01:41.112256Z","iopub.status.busy":"2024-05-20T12:01:41.111345Z","iopub.status.idle":"2024-05-20T12:01:41.390940Z","shell.execute_reply":"2024-05-20T12:01:41.389877Z","shell.execute_reply.started":"2024-05-20T12:01:41.112221Z"},"trusted":true},"outputs":[],"source":["from typing_extensions import Concatenate\n","text = \"\"\n","for i,page in enumerate(pdfreader.pages):\n"," content = page.extract_text()\n"," if content:\n"," text += content"]},{"cell_type":"code","execution_count":15,"metadata":{"execution":{"iopub.execute_input":"2024-05-20T12:01:45.081869Z","iopub.status.busy":"2024-05-20T12:01:45.081446Z","iopub.status.idle":"2024-05-20T12:01:45.086668Z","shell.execute_reply":"2024-05-20T12:01:45.085563Z","shell.execute_reply.started":"2024-05-20T12:01:45.081823Z"},"trusted":true},"outputs":[],"source":["from langchain.docstore.document import Document "]},{"cell_type":"code","execution_count":16,"metadata":{"execution":{"iopub.execute_input":"2024-05-20T12:01:49.722451Z","iopub.status.busy":"2024-05-20T12:01:49.722053Z","iopub.status.idle":"2024-05-20T12:01:49.727910Z","shell.execute_reply":"2024-05-20T12:01:49.726775Z","shell.execute_reply.started":"2024-05-20T12:01:49.722417Z"},"trusted":true},"outputs":[],"source":["docs = [Document(page_content=text)]"]},{"cell_type":"code","execution_count":17,"metadata":{"execution":{"iopub.execute_input":"2024-05-20T12:01:52.763686Z","iopub.status.busy":"2024-05-20T12:01:52.762590Z","iopub.status.idle":"2024-05-20T12:01:52.770967Z","shell.execute_reply":"2024-05-20T12:01:52.769916Z","shell.execute_reply.started":"2024-05-20T12:01:52.763639Z"},"trusted":true},"outputs":[],"source":["text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)\n","chunks = text_splitter.create_documents([text])"]},{"cell_type":"code","execution_count":18,"metadata":{"execution":{"iopub.execute_input":"2024-05-20T12:01:57.337216Z","iopub.status.busy":"2024-05-20T12:01:57.336430Z","iopub.status.idle":"2024-05-20T12:01:57.344190Z","shell.execute_reply":"2024-05-20T12:01:57.343219Z","shell.execute_reply.started":"2024-05-20T12:01:57.337185Z"},"trusted":true},"outputs":[{"data":{"text/plain":["40"]},"execution_count":18,"metadata":{},"output_type":"execute_result"}],"source":["len(chunks)"]},{"cell_type":"markdown","metadata":{"execution":{"iopub.execute_input":"2024-05-19T07:59:54.586537Z","iopub.status.busy":"2024-05-19T07:59:54.586203Z","iopub.status.idle":"2024-05-19T07:59:54.592012Z","shell.execute_reply":"2024-05-19T07:59:54.590994Z","shell.execute_reply.started":"2024-05-19T07:59:54.586512Z"}},"source":["loader = TextLoader(\"/kaggle/input/testing-dataset/thyroid.txt\",\n"," encoding=\"utf8\")\n","documents = loader.load()"]},{"cell_type":"code","execution_count":19,"metadata":{"execution":{"iopub.execute_input":"2024-05-20T12:02:07.752498Z","iopub.status.busy":"2024-05-20T12:02:07.751596Z","iopub.status.idle":"2024-05-20T12:02:15.517927Z","shell.execute_reply":"2024-05-20T12:02:15.516798Z","shell.execute_reply.started":"2024-05-20T12:02:07.752464Z"},"trusted":true},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"9732b0cdc83049f0b91347dd5b95b2ec","version_major":2,"version_minor":0},"text/plain":[".gitattributes: 0%| | 0.00/1.23k [00:00 Entering new RetrievalQA chain...\u001b[0m\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"b8ba6970fc6444f5abe133dbeb56b189","version_major":2,"version_minor":0},"text/plain":["Batches: 0%| | 0/1 [00:00 Finished chain.\u001b[0m\n","Inference time: 19.592 sec.\n","\n","Result: Sure, here is a summary of the text in 300 words or less:\n","\n","The text discusses the history and evolution of artificial intelligence (AI) and its impact on society. AI can be classified into different types, such as analytical, human-inspired, and humanized AI, depending on its cognitive, emotional, and social intelligence. The author argues that when AI reaches mainstream usage, it is frequently no longer considered as such, and this phenomenon is known as the AI effect. The text also mentions that advances in AI have led to the development of image recognition, smart speakers, and self-driving cars, and it has entered the business environment and public conversation. The author concludes that AI has the potential to revolutionize many aspects of our lives, but it also raises important ethical and societal questions that need to be addressed.\n"]}],"source":["test_rag(qa, query=\"Can you summarize this long piece of text into a concise summary in not more than 300 words?\", \n"," num_outputs=1, max_length=300, min_length=30, do_sample=False)"]},{"cell_type":"code","execution_count":25,"metadata":{"execution":{"iopub.execute_input":"2024-05-20T12:03:08.232387Z","iopub.status.busy":"2024-05-20T12:03:08.231964Z","iopub.status.idle":"2024-05-20T12:03:43.576910Z","shell.execute_reply":"2024-05-20T12:03:43.575941Z","shell.execute_reply.started":"2024-05-20T12:03:08.232353Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["Query: Can you summarize this long piece of text into a concise summary in not more than 500 words?\n","\n","\n","\n","\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"b38405c7565443d19d86bdad156b6747","version_major":2,"version_minor":0},"text/plain":["Batches: 0%| | 0/1 [00:00 Finished chain.\u001b[0m\n","Inference time: 35.34 sec.\n","\n","Result: Of course! Here is a summary of the text in 475 words:\n","\n","The article discusses the history and future of artificial intelligence (AI). The author divides AI's history into four seasons: spring, summer, fall, and winter. Spring represents the birth of AI, summer its growth and development, fall its challenges and limitations, and winter its potential future advancements.\n","\n","In the spring season, AI was born out of the need to automate tasks and solve complex problems. The first AI program was developed in the 1950s, and since then, AI has evolved rapidly, with the development of new algorithms, models, and techniques. Summer represents the growth and development of AI, where it became more sophisticated and capable of performing a wide range of tasks.\n","\n","In the fall season, AI faced challenges and limitations, such as the lack of data, the difficulty of interpreting results, and the need for better integration with human systems. Despite these challenges, AI continued to advance, with the development of new technologies such as deep learning and natural language processing. Winter represents the potential future advancements of AI, where it is expected to reach new heights, such as the development of Artificial General Intelligence (AGI) and Superintelligence.\n","\n","The author notes that the AI effect occurs when onlookers discount the behavior of an AI program, arguing that it is not real intelligence. However, as the British science fiction writer Arthur Clarke once said, \"Any sufficiently advanced technology is indistinguishable from magic.\" The author concludes by emphasizing the importance of understanding AI's past, present, and future to prepare for the challenges ahead.\n"]}],"source":["test_rag(qa, query=\"Can you summarize this long piece of text into a concise summary in not more than 500 words?\", \n"," num_outputs=1, max_length=500, min_length=30, do_sample=False)"]}],"metadata":{"kaggle":{"accelerator":"gpu","dataSources":[{"datasetId":1332934,"sourceId":2219619,"sourceType":"datasetVersion"},{"datasetId":4884056,"sourceId":8234785,"sourceType":"datasetVersion"},{"datasetId":4889139,"sourceId":8241861,"sourceType":"datasetVersion"},{"datasetId":4889150,"sourceId":8241874,"sourceType":"datasetVersion"},{"datasetId":4889169,"sourceId":8241895,"sourceType":"datasetVersion"},{"datasetId":4910415,"sourceId":8270647,"sourceType":"datasetVersion"},{"datasetId":5045202,"sourceId":8463148,"sourceType":"datasetVersion"},{"datasetId":5048074,"sourceId":8466953,"sourceType":"datasetVersion"},{"modelInstanceId":3093,"sourceId":4298,"sourceType":"modelInstanceVersion"},{"isSourceIdPinned":true,"modelInstanceId":1462,"sourceId":1728,"sourceType":"modelInstanceVersion"}],"dockerImageVersionId":30699,"isGpuEnabled":true,"isInternetEnabled":true,"language":"python","sourceType":"notebook"},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.12.1"}},"nbformat":4,"nbformat_minor":4}