{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "id": "2RRAOT1TyutY" }, "outputs": [], "source": [ "%%capture\n", "!pip install -q transformers einops accelerate langchain bitsandbytes" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "BqDz7Ye0zc5Z", "outputId": "1fc70718-a05b-4e6f-e37d-9ecd4581ee11" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_|\n", " _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n", " _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_|\n", " _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n", " _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_|\n", "\n", " To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .\n", "Enter your token (input will not be visible): \n", "Add token as git credential? (Y/n) y\n", "Token is valid (permission: fineGrained).\n", "\u001b[1m\u001b[31mCannot authenticate through git-credential as no helper is defined on your machine.\n", "You might have to re-authenticate when pushing to the Hugging Face Hub.\n", "Run the following command in your terminal in case you want to set the 'store' credential helper as default.\n", "\n", "git config --global credential.helper store\n", "\n", "Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details.\u001b[0m\n", "Token has not been saved to git credential helper.\n", "Your token has been saved to /root/.cache/huggingface/token\n", "Login successful\n" ] } ], "source": [ "!huggingface-cli login" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "7e3i_Makzm7H" }, "outputs": [], "source": [ "!mkdir pdfs" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "id": "soOpcWEC0J33" }, "outputs": [], "source": [ "%%capture\n", "pip install pypdf" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "w_CJngLD0wJI" }, "outputs": [], "source": [ "%%capture\n", "pip install langchain-community" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "id": "YbuzSMTO054U" }, "outputs": [], "source": [ "from langchain.document_loaders import PyPDFDirectoryLoader" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "id": "JiHT3lr31vby" }, "outputs": [], "source": [ "loader=PyPDFDirectoryLoader(\"pdfs\")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "MVw1va833uPI", "outputId": "806a6393-b609-43c8-e5f2-00260c566b24" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING:pypdf.generic._base:could not convert string to float: '0.00-10' : FloatObject (b'0.00-10') invalid; use 0.0 instead\n", "WARNING:pypdf.generic._base:could not convert string to float: '0.00-10' : FloatObject (b'0.00-10') invalid; use 0.0 instead\n", "WARNING:pypdf.generic._base:could not convert string to float: '0.00-10' : FloatObject (b'0.00-10') invalid; use 0.0 instead\n", "WARNING:pypdf.generic._base:could not convert string to float: '0.00-10' : FloatObject (b'0.00-10') invalid; use 0.0 instead\n", "WARNING:pypdf.generic._base:could not convert string to float: '0.00-30' : FloatObject (b'0.00-30') invalid; use 0.0 instead\n", "WARNING:pypdf.generic._base:could not convert string to float: '0.00-30' : FloatObject (b'0.00-30') invalid; use 0.0 instead\n", "WARNING:pypdf.generic._base:could not convert string to float: '0.00-30' : FloatObject (b'0.00-30') invalid; use 0.0 instead\n", "WARNING:pypdf.generic._base:could not convert string to float: '0.00-30' : FloatObject (b'0.00-30') invalid; use 0.0 instead\n", "WARNING:pypdf.generic._base:could not convert string to float: '0.00-20' : FloatObject (b'0.00-20') invalid; use 0.0 instead\n", "WARNING:pypdf.generic._base:could not convert string to float: '0.00-20' : FloatObject (b'0.00-20') invalid; use 0.0 instead\n", "WARNING:pypdf.generic._base:could not convert string to float: '0.00-20' : FloatObject (b'0.00-20') invalid; use 0.0 instead\n", "WARNING:pypdf.generic._base:could not convert string to float: '0.00-20' : FloatObject (b'0.00-20') invalid; use 0.0 instead\n", "WARNING:pypdf.generic._base:could not convert string to float: '0.00-20' : FloatObject (b'0.00-20') invalid; use 0.0 instead\n", "WARNING:pypdf.generic._base:could not convert string to float: '0.00-20' : FloatObject (b'0.00-20') invalid; use 0.0 instead\n", "WARNING:pypdf.generic._base:could not convert string to float: '0.00-20' : FloatObject (b'0.00-20') invalid; use 0.0 instead\n", "WARNING:pypdf.generic._base:could not convert string to float: '0.00-20' : FloatObject (b'0.00-20') invalid; use 0.0 instead\n", "WARNING:pypdf.generic._base:could not convert string to float: '0.00-50' : FloatObject (b'0.00-50') invalid; use 0.0 instead\n", "WARNING:pypdf.generic._base:could not convert string to float: '0.00-50' : FloatObject (b'0.00-50') invalid; use 0.0 instead\n", "WARNING:pypdf.generic._base:could not convert string to float: '0.00-50' : FloatObject (b'0.00-50') invalid; use 0.0 instead\n", "WARNING:pypdf.generic._base:could not convert string to float: '0.00-50' : FloatObject (b'0.00-50') invalid; use 0.0 instead\n", "WARNING:pypdf.generic._base:could not convert string to float: '0.00-50' : FloatObject (b'0.00-50') invalid; use 0.0 instead\n" ] } ], "source": [ "data=loader.load()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "id": "JKY9YCmh3woY" }, "outputs": [], "source": [ "# we are going to create the chunks\n", "from langchain.text_splitter import RecursiveCharacterTextSplitter" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "id": "c5DqhJ-534PC" }, "outputs": [], "source": [ "text_chunks=RecursiveCharacterTextSplitter(chunk_size=300,chunk_overlap=20)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "id": "UGWFsLLP38AA" }, "outputs": [], "source": [ "chunks=text_chunks.split_documents(data)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "wfl-v22k4AOZ", "outputId": "756e198f-94eb-49c1-e294-13abc65c5ecd" }, "outputs": [ { "data": { "text/plain": [ "235" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(chunks)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "id": "ml7vlxUW5NV6" }, "outputs": [], "source": [ "# creating embeddings using huggingface embedding model\n", "from langchain.embeddings import HuggingFaceEmbeddings" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "id": "uOcz_FvSFFOc" }, "outputs": [], "source": [ "%%capture\n", "pip install sentence-transformers" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 573, "referenced_widgets": [ "024e5f65bea84dc4a0d5ce8a27fd9395", "a67609461c9d48c492c88bf339fbfc01", "2dd818d0785140b48ff7b75ab0dad735", "19c8b0aba7024aecbdabea2542d55b79", "d09ce90cbeb64256959bc09d12383eb0", "def73f1a38a54d7890e3e9b8272545f8", "f213e17efedc4ddda2e9111aa3e912f1", "00e8c8dba2644b858ebe2a660d25b771", "cc0f73b62bb84a7596f0e57e3c72744c", "daf38282f5ff4f01b7d3d4aa6ec81fa1", "ba8fd584f99d42d48a440a084816e3c3", "f490c95df6e240fabbc923029a7bf1cd", "ef1f514de8b547148f08651f4f87e529", "43a56651796e43b095b960d9d0ad3a8c", "c25d6e0714ed40dbaf069c86af9a9748", "6807440eea554c8ba9d18695e89305ed", "7fac4b1aeb2c4ce88e5d5ce378d903e4", "ef836c87c24e4a58a001398eaa47be9f", "5266584459f64499b0340a685ccc7359", "0f9d76859b324d55a956b3d467d0ac2b", "914ef6899d9645ac8a71dee1deded9b4", "bbc82792280c46d6b200a5199a722332", "23315485180c429b87bce4bf62ee97e1", "0e7d6d4e727643358dc4a450953507b5", "01b651c8725d49bdab8f1edb98ad386a", "ceebfb4b5e1648bd9065cac2589b4908", "ac3a5d63ef3e46358bff6bcc02444e50", "2ee1daf895124affa8ff8d9b1fc42152", "3a976ded110741fcae11fa6641dfeb13", "8c289901f9354ecd91a63edb6c954c50", "75d650cb170e46fb85b4fdae5a318664", "b63d0f44e17a44229255ce808a70c9fe", "2f72eeca7c124901b61fa11a81af88fd", "3d585e4cd7b443f5aaaf0929841803d2", "3a9c49f2c47c415591d6b2756e747cb2", "a266da18ec974fc5b0d37b379431f96c", "a92c05d488d444b7ac4e84161032db80", "7a16ebefc031475f8f9ada21dbac3748", "d453e793707844ac834ff82a276dfdf4", "f76803c6c8934db5a2657a82d0e975f3", "e603d3314cc74d45bc2db841ca52a7b1", "b8899013cf684774888bcfd5e473ce39", "51f9d387cec041c9950d853e0f752096", "8ec1c76de6574a1d89d7b063af820a28", "a455cb0f609e4ec3bf8b334f96486969", "0ce51d8f8fdb4f3fa21a69cbacf23b13", "0516102ce67c47a6bbb669f8b6dd0737", "252cce6df76b487a975d6eaff499b728", "f1c80322dd4b4619b91252426ccfaa72", "c9f022cfcd9c4fa0a980f68280c70145", "6b5bbe06dc1141d694942f3fe6d56cb3", "243dda2e826c4db898caae7c4c7332c9", "e4152c53c73c44dd9352fd3a5d86e70f", "155077c337324c10ac1c7b9dbce54bb5", "ac57e64322ee48aabcf0c9ecf65394e8", "e1f47ba111a3432ea5374bfdcabf9665", "8724b6427ae242a098d507b8402058dd", "054b6c9e10b5438db13230bba7861a8e", "3c40000fe4a4456e8d580e6d650b1ef1", "2c4f2aacec4b47eeb3038df569bdb03a", "73b112025ca3424b935b3c2854c040d8", "df9b37b9916349c8947ccccff773227a", "ef1212cda26943a3a67437b18de8c103", "c0bc404ff7094998bfc972a3e94d60ce", "4203765e75eb4a8cba2a3d0f093c21e7", "2ab16698b0ba4dafa53416d0b7ddf148", "4c7a871fea9a4e56aa85fd956fd06fe6", "6848edcfc30d48d09e56eb0b565542ce", "aa7b1e04f14b4f94a215746c4fdf1809", "ac670ecb92d04a599405d68aa9e8a863", "f333382fb7c24383a56c4e03ec3edbb0", "d47eba7cb30f41bb9808b358b47aa480", "c3caa3efaf2947fa9bafe571cbb10212", "8edca52baf554bcc8f639dda3ce6ab7b", "1c28d90ebf4b4016bd065512f8e5a871", "e5ef93b9199c4f29a312c61042cfc725", "cbe6e4c1fd2f4026967b42c9252c2af6", "bd973f74a08c4a62964625eec55d66e4", "bba46396e97242fa8dc3549fa4c1cd46", "5e92bcdee6f340a399cb5c240b1962d0", "58e7ec99c5104a59ad1cbd470695953b", "660e0da0e03940949069140c3eb8bce5", "861e4d8b612f4ead8386651201bbe63f", "60579cbac5984ec5aef62ab80a148329", "a05a6b953dee46ef9803f082d0fcbb65", "ff6949908a184607b6b91e663d03b318", "027b48a0ed394661ba1fcd82d2da8294", "ce748b5e48a640cfa85d4b128dac6d7b", "229339a719104824a92ef4be9d3f3152", "d1fe4920ea244e359f9513ad5a9eb545", "f8a4e9b9b0464913be456134c54f310f", "ad4ec0cc6c5c4e9ba745db68175756aa", "6b2812bae90a4b1ab418402c4600f39b", "e9b747f2c458451e990d8a805713aa31", "88eac84cb2c7404987f3ea720589f55b", "01867efc1d4a4239adafe64954f96d20", "11f81f23a7dd481da40c681892ba2128", "53893d1681914e4ab51107f072d1a20a", "029576d58c644347a12daf7912304dce", "a884d741295b47a3a3ad7a61f8a71d65", "7e03ff076f5b43bd80423200c3ab5893", "21455ac70db0424d88aa56b39ce0f0a7", "38f2974ca5184dd6b0d712b29cf01fd9", "3cba0d24250a4b9186e7e4e5746788d9", "db1fd593c53d474798dabcc4a830a884", "d72a0e7a878b481681ffa6dd023c180f", "824bd98e2e2342b28499a93a631d0091", "6936b9b9a69b4fb9b9d61dce6fe0bd1f", "df9c281cf21a4abab595ac4c1d9aaea0", "56fc71f66afc426ea1a4e7a9d8387e40", "b6d9c4666a6c4a199c1a27e000d3710f", "1a687f53b527458193a4aaffda0c90e5", "1d707b6ed40f440994f145ea8a82435b", "34961bc9485947bb90c4203500a710a0", "2463cd9419ca4b758ef0f7783eed0376", "e4084b42931447709f873eddb41308df", "da0af39133f544d4b197a8935110ad8b", "d56061a5a80e46bf9d4a5e79cf22ed9c", "4fa8d686aa7044bba22729045b2d8858", "ea520c080f504c088c8df374fb79f394", "cf45219fef7845a7a6b129a3146d96db" ] }, "id": "IaBMu1JUFMhD", "outputId": "2c66487f-9e54-4961-8e56-c4c574878085" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.10/dist-packages/langchain_core/_api/deprecation.py:139: LangChainDeprecationWarning: The class `HuggingFaceEmbeddings` was deprecated in LangChain 0.2.2 and will be removed in 0.3.0. An updated version of the class exists in the langchain-huggingface package and should be used instead. To use it run `pip install -U langchain-huggingface` and import as `from langchain_huggingface import HuggingFaceEmbeddings`.\n", " warn_deprecated(\n", "/usr/local/lib/python3.10/dist-packages/sentence_transformers/cross_encoder/CrossEncoder.py:11: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n", " from tqdm.autonotebook import tqdm, trange\n", "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n", "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", "You will be able to reuse this secret in all of your notebooks.\n", "Please note that authentication is recommended but still optional to access public models or datasets.\n", " warnings.warn(\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "024e5f65bea84dc4a0d5ce8a27fd9395", "version_major": 2, "version_minor": 0 }, "text/plain": [ "modules.json: 0%| | 0.00/349 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f490c95df6e240fabbc923029a7bf1cd", "version_major": 2, "version_minor": 0 }, "text/plain": [ "config_sentence_transformers.json: 0%| | 0.00/116 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "23315485180c429b87bce4bf62ee97e1", "version_major": 2, "version_minor": 0 }, "text/plain": [ "README.md: 0%| | 0.00/10.7k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3d585e4cd7b443f5aaaf0929841803d2", "version_major": 2, "version_minor": 0 }, "text/plain": [ "sentence_bert_config.json: 0%| | 0.00/53.0 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a455cb0f609e4ec3bf8b334f96486969", "version_major": 2, "version_minor": 0 }, "text/plain": [ "config.json: 0%| | 0.00/612 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e1f47ba111a3432ea5374bfdcabf9665", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model.safetensors: 0%| | 0.00/90.9M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "4c7a871fea9a4e56aa85fd956fd06fe6", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer_config.json: 0%| | 0.00/350 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "bd973f74a08c4a62964625eec55d66e4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "vocab.txt: 0%| | 0.00/232k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "229339a719104824a92ef4be9d3f3152", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer.json: 0%| | 0.00/466k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a884d741295b47a3a3ad7a61f8a71d65", "version_major": 2, "version_minor": 0 }, "text/plain": [ "special_tokens_map.json: 0%| | 0.00/112 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b6d9c4666a6c4a199c1a27e000d3710f", "version_major": 2, "version_minor": 0 }, "text/plain": [ "1_Pooling/config.json: 0%| | 0.00/190 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "embeddings=HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-MiniLM-L6-v2\")" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "id": "B3pGt8EvFaoc" }, "outputs": [], "source": [ "%%capture\n", "pip install faiss-cpu" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "id": "SKAX7hs-Fja7" }, "outputs": [], "source": [ "from langchain.vectorstores import FAISS" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "id": "fYPaTd4qFpJd" }, "outputs": [], "source": [ "vectordabase=FAISS.from_documents(chunks,embeddings)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "id": "JgTfXa9tHXZn" }, "outputs": [], "source": [ "question=\"what is generative ai?\"" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "-JIHpbCAHjOT", "outputId": "c437e044-f4c4-4ac7-83fd-9ba8a1202f9c" }, "outputs": [ { "data": { "text/plain": [ "[Document(metadata={'source': 'pdfs/Generative AI.pdf', 'page': 1}, page_content='Generative AI \\nGenerative AI refers to a branch of artificial intelligence that focuses on creating \\nmodels and algorithms capable of generating new, original content, such as \\nimages, text, music, and even videos. Unlike traditional AI models that are trained'),\n", " Document(metadata={'source': 'pdfs/Generative AI.pdf', 'page': 78}, page_content='Generative AI \\nGenerative AI is a type of AI that can create new content including text, code, images and music. Generative AI models are trained \\non large datasets of existing content, learning to identify patterns in data and using those patterns to generate new content.'),\n", " Document(metadata={'source': 'pdfs/Generative AI.pdf', 'page': 75}, page_content='music or even generating code. Instead of relying on pre-programmed responses, \\ngenerative AI leverages its understanding of language to produce truly original \\noutputs, making it a powerful tool for creative exploration and innovation.'),\n", " Document(metadata={'source': 'pdfs/Generative AI.pdf', 'page': 79}, page_content='to innovate and truly be creative. But generative AI breaks this mold. Not only can generative AI models \\nanswer questions and follow commands, but they can also imagine and produce entirely new concepts \\n— giving way to exciting possibilities and advancements across fields and industries.')]" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vectordabase.similarity_search(question)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "id": "yKv2Qc8GHrxE" }, "outputs": [], "source": [ "model=\"google/flan-t5-large\"" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "id": "EDv9-8zWHs9B" }, "outputs": [], "source": [ "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM,pipeline\n", "from langchain import HuggingFacePipeline" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 241, "referenced_widgets": [ "64564aec337d4c9598661e98b2b043ec", "4544e44ebe45439b87deb2e5d469c696", "9fe7c51ac9474f5d86da98539dff1a55", "2b4454dabf50499db09c80bef2f24caf", "ed82f701d5d648a6bc60d10510687032", "c94b5f61673244b8aab19df293f5de96", "92e4e0f4dd60477f94435d6f1a40568b", "283cbf3793024ede8e4504e74ed08c3d", "352553299d8448e9a294e0ae50226220", "8b0ab2e552104f3e999bce15acbcddff", "7a9bfa6b49274672a602412e3a00c086", "f4a38c0ebf8248649b4215cb6825378c", "a03ea366508f440baa65d4379a7c8d31", "7629fe8cd79e402e90818d7f826096a1", "ae660db7d48449b3844ce0f0b9b35b30", "9eebd7b5f7fb4f69a38c80829e1881b9", "ac5ba4fd5dd74a6bba545e195d00ddc6", "2e06d70451804cae8cb302dceb23e425", "94ef72978d7f4dfe8e73b45e1cd0a5a8", "d09daf1e10824c24a507d5317dfed540", "3e365be911cd4c869aa7845bbaa84ea2", "7b949ce6c5e74ce5adc36616b2491514", "7c1d0fd89a104e368951b3ba343bfac5", "5766ae6568f14170bcdbce39c15f626d", "97193c7f9fc1441891fc610ccdf19f52", "f3accfbbfc14478da378278f9161c48c", "fe9a43dbe6cf43338bbee760b764bb1d", "77b4d8cd33f741d581c40690f1c5a36b", "bbc49b8ba1204c3a9b41de22ca8f98a1", "8c162e8462684de8af27985f3d99d2fd", "d84bbe53477948e8a37897c343881188", "a5996e9c74f74d90be29b67efbcbce7a", "a69fe4a869dc444c968471514dedb9df", "05f2c6ed7848427f857d2d290ba46376", "32b1726cb9384b7eb25127f863f97022", "31d362d3bc43421a8f479081b02410bd", "e72babd076674e1d831238046e38df16", "407c9965bb80493286411e09cf1babe1", "00599ae26ce6496f800251746e6985bf", "71ccac03ebf94806ae81475a2fdf1afe", "dd4677c80b4a40a2ad46be58428fc174", "451ed91ddde448d5b06ba9419cc15a1e", "3e4a789f49cd414991b15b3bc3788e72", "1103945a3ee74f0c91b535925fc375a5", "dc5cef7f2a184fca8307c982247ac2ba", "349aef9f37b747cbad85553379ddf632", "6e3757cfec0b478db0068880c4a7c5b1", "727b01d958d0440086c38851ed1838b0", "6a2593724a7d4ae78e0dd7f392a20581", "c0e3937cca604d3d910b6333e91b4c55", "a05eb6af0e194f32b81366ba37bae05e", "ebe77886e86c4d8aaee28804820c0c57", "b236bfed929c4fb884c1d2ae4b8b6f43", "c7741648912b4803b09a6f9a38326470", "8b39b314a3624cbfa0bcac4139dc2e01", "a75371f14aaf443bbfde44c5a90d5ffb", "7565a537a6f8425b882333aa1614a49b", "fa94012f596843e4b8e3295ebb911ab2", "82bfd5c4f9364748b4733c8cbf988888", "aa1308f0059247178a53cd34742b332a", "62afd307f8f840be859f71bf83178d10", "2d52a563cf4641f3a7195b81a3521f5b", "ba7d4df3373040eaaf112b667c1052a3", "405d9f9b727e4eb4aee8436a097ea0b8", "2c59c709f6d842ab8737cab97e3be905", "2b1565dbafc7422984486094b4b3e700", "b893c477da23446f95ef64b1f775ce06", "ca2906a4a3ab419e8de48067581511ad", "4b865467d2fd4c1c89dbf667aaec6e5f", "87adbcb6c3e5437095950e3b9e26139f", "ed8a5b3f1e5d4380af511266bd97a31b", "22235393434e421d82941d22b6ab9dc5", "82b6f0492c744cc99b3c5f26b889ed47", "79ccfd95bdf843b3b9b88ca3e2ae4b76", "24fade4154084a0e831e025d40c58a77", "e5d37de7793d4aa697f4dcbe87917452", "56e8d79f8eae4a7d8bdc87db4e9e2761" ] }, "id": "Py3SsTuSHwEL", "outputId": "5b69379a-1905-45cb-f52d-2f0dd9d7d72c" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "64564aec337d4c9598661e98b2b043ec", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer_config.json: 0%| | 0.00/2.54k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f4a38c0ebf8248649b4215cb6825378c", "version_major": 2, "version_minor": 0 }, "text/plain": [ "spiece.model: 0%| | 0.00/792k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7c1d0fd89a104e368951b3ba343bfac5", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer.json: 0%| | 0.00/2.42M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "05f2c6ed7848427f857d2d290ba46376", "version_major": 2, "version_minor": 0 }, "text/plain": [ "special_tokens_map.json: 0%| | 0.00/2.20k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "dc5cef7f2a184fca8307c982247ac2ba", "version_major": 2, "version_minor": 0 }, "text/plain": [ "config.json: 0%| | 0.00/662 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a75371f14aaf443bbfde44c5a90d5ffb", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model.safetensors: 0%| | 0.00/3.13G [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b893c477da23446f95ef64b1f775ce06", "version_major": 2, "version_minor": 0 }, "text/plain": [ "generation_config.json: 0%| | 0.00/147 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "tokenizer=AutoTokenizer.from_pretrained(model) # tokenization(token)\n", "model1=AutoModelForSeq2SeqLM.from_pretrained(model) # (generation)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "id": "ZOo35nBXIGxO" }, "outputs": [], "source": [ "pipe=pipeline(\"text2text-generation\",model=model1,tokenizer=tokenizer)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "KwAgKsr3IKj7", "outputId": "1ab9ce06-97a6-4c2b-9022-5acb11321725" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.10/dist-packages/langchain_core/_api/deprecation.py:139: LangChainDeprecationWarning: The class `HuggingFacePipeline` was deprecated in LangChain 0.0.37 and will be removed in 0.3. An updated version of the class exists in the langchain-huggingface package and should be used instead. To use it run `pip install -U langchain-huggingface` and import as `from langchain_huggingface import HuggingFacePipeline`.\n", " warn_deprecated(\n" ] } ], "source": [ "llm=HuggingFacePipeline(pipeline=pipe, model_kwargs={\"temperature\": 0, \"max_length\": 512})" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "id": "gkkwGQwMIOtu" }, "outputs": [], "source": [ "# retrieval qa takes prompt template and llm\n", "from langchain.prompts import PromptTemplate\n", "\n", "template = \"\"\"use the context to provide a concise answer and if you don't know just say don't now.\n", "{context}\n", "Question: {question}\n", "Helpful Answer:\"\"\"\n", "QA_CHAIN_PROMPT = PromptTemplate.from_template(template)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "id": "qwwR9JCvIT7W" }, "outputs": [], "source": [ "from langchain.chains import RetrievalQA\n", "qa_chain = RetrievalQA.from_chain_type(\n", " llm, retriever=vectordabase.as_retriever(), chain_type_kwargs={\"prompt\": QA_CHAIN_PROMPT}\n", ")" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 93 }, "id": "k0PnQ8uzIXvc", "outputId": "378ebb79-c51b-4d1b-dfc0-42dc434a1190" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1249: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n", " warnings.warn(\n" ] }, { "data": { "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" }, "text/plain": [ "'a branch of artificial intelligence that focuses on creating models and algorithms capable of generating new'" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "qa_chain.run(\"what is generative ai?\")" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "n0jidIhKIrA8", "outputId": "cd223e1b-407c-4146-b263-f65348ae7981" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.10/dist-packages/langchain_core/_api/deprecation.py:139: LangChainDeprecationWarning: The method `Chain.__call__` was deprecated in langchain 0.1.0 and will be removed in 0.3.0. Use invoke instead.\n", " warn_deprecated(\n", "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1249: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n", " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Generative AI models are trained on large datasets of existing content, learning to identify patterns\n" ] } ], "source": [ "question=\"more detail about generative ai?\"\n", "result = qa_chain({\"query\": question})\n", "print(result[\"result\"])" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "id": "hVD32vnSJMSn" }, "outputs": [], "source": [ "def fetch(question,history):\n", " result=qa_chain({\"query\":question})\n", " return result[\"result\"]" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "id": "wEfl85FeJQ9e" }, "outputs": [], "source": [ "%%capture\n", "pip install gradio" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "id": "JWQU618uJZ-E" }, "outputs": [], "source": [ "import gradio as gr" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 800 }, "id": "nUakx9eEJget", "outputId": "a92a28b7-98ec-4322-9e5d-83def36f0256" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Caching examples at: '/content/gradio_cached_examples/15'\n", "Caching example 1/3\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1249: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n", " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Caching example 2/3\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1249: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n", " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Caching example 3/3\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1249: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n", " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n", "Running on public URL: https://2e069e3f5ca0f0f8da.gradio.live\n", "\n", "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "