{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "execution_count": 23, "metadata": { "id": "76BpiP5vMhpG" }, "outputs": [], "source": [ "# !pip install openai langchain python-dotenv -q" ] }, { "cell_type": "code", "source": [ "# !pip install chromadb==0.3.22 tiktoken -q" ], "metadata": { "id": "ASD5ljxgNNbs" }, "execution_count": 24, "outputs": [] }, { "cell_type": "code", "source": [ "# !pip install chromadb -U" ], "metadata": { "id": "8IWdv5UgNP6c" }, "execution_count": 25, "outputs": [] }, { "cell_type": "code", "source": [ "# !pip install gradio" ], "metadata": { "id": "DliXsYaZOtAH" }, "execution_count": 26, "outputs": [] }, { "cell_type": "code", "source": [ "from langchain.embeddings.openai import OpenAIEmbeddings\n", "from langchain.vectorstores import Chroma\n", "from langchain.text_splitter import CharacterTextSplitter\n", "from langchain.chains.question_answering import load_qa_chain\n", "from langchain.llms import OpenAI\n", "import os\n" ], "metadata": { "id": "jGEXeboZNAb9" }, "execution_count": 27, "outputs": [] }, { "cell_type": "code", "source": [ "with open(\"/content/Data_Engineering.txt\") as f:\n", " hitchhikersguide = f.read()" ], "metadata": { "id": "h4QnGIJYNjeM" }, "execution_count": 28, "outputs": [] }, { "cell_type": "code", "source": [ "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0, separator = \"\\n\")\n", "texts = text_splitter.split_text(hitchhikersguide)\n", "print(f\"Final lenght: {len(texts)}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "RmfWIfclN4DP", "outputId": "58e3ffcf-b56a-4120-bcd9-718396bfa49c" }, "execution_count": 29, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Final lenght: 1\n" ] } ] }, { "cell_type": "code", "source": [ "### Setting up the OpenAI env\n", "\n", "!echo OPENAI_API_KEY=\"" > .env" ], "metadata": { "id": "4Y4-ZTsZONsZ" }, "execution_count": 30, "outputs": [] }, { "cell_type": "code", "source": [ "import os\n", "import openai\n", "from dotenv import load_dotenv\n", "\n", "load_dotenv(\".env\")\n", "\n", "openai.api_key = os.environ.get(\"OPENAI_API_KEY\")" ], "metadata": { "id": "PPYw5waOOT0D" }, "execution_count": 31, "outputs": [] }, { "cell_type": "code", "source": [ "embeddings = OpenAIEmbeddings()" ], "metadata": { "id": "pj-lRr3UODGm" }, "execution_count": 32, "outputs": [] }, { "cell_type": "code", "source": [ "docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{\"source\": str(i)} for i in range(len(texts))]).as_retriever()" ], "metadata": { "id": "DcDeDj9HOFgI" }, "execution_count": 33, "outputs": [] }, { "cell_type": "code", "source": [ "# Creating the Chain Model\n", "chain = load_qa_chain(OpenAI(temperature=0), chain_type=\"stuff\")" ], "metadata": { "id": "7Sh5PEFoOcF9" }, "execution_count": 34, "outputs": [] }, { "cell_type": "code", "source": [ "def make_inference(query):\n", " docs = docsearch.get_relevant_documents(query)\n", " return(chain.run(input_documents=docs, question=query))" ], "metadata": { "id": "meb-lvSsOgsM" }, "execution_count": 35, "outputs": [] }, { "cell_type": "code", "source": [ "import gradio\n", "\n", "if __name__ == \"__main__\":\n", " # make a gradio interface\n", " import gradio as gr\n", "\n", " gr.Interface(\n", " make_inference,\n", " [\n", " gr.inputs.Textbox(lines=2, label=\"Query\"),\n", " ],\n", " gr.outputs.Textbox(label=\"Response\"),\n", " title=\"🗣️TalkToMyDoc📄\",\n", " description=\"🗣️TalkToMyDoc📄 is a tool that allows you to ask questions about a document. In this case - Hitch Hitchhiker's Guide to the Galaxy.\",\n", " ).launch()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 781 }, "id": "-btP40G1OkgI", "outputId": "062d6b92-d8c2-4256-deef-023bb9b0292a" }, "execution_count": 36, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ ":10: GradioDeprecationWarning: Usage of gradio.inputs is deprecated, and will not be supported in the future, please import your component from gradio.components\n", " gr.inputs.Textbox(lines=2, label=\"Query\"),\n", ":10: GradioDeprecationWarning: `optional` parameter is deprecated, and it has no effect\n", " gr.inputs.Textbox(lines=2, label=\"Query\"),\n", ":10: GradioDeprecationWarning: `numeric` parameter is deprecated, and it has no effect\n", " gr.inputs.Textbox(lines=2, label=\"Query\"),\n", ":12: GradioDeprecationWarning: Usage of gradio.outputs is deprecated, and will not be supported in the future, please import your components from gradio.components\n", " gr.outputs.Textbox(label=\"Response\"),\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n", "Note: opening Chrome Inspector may crash demo inside Colab notebooks.\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "application/javascript": [ "(async (port, path, width, height, cache, element) => {\n", " if (!google.colab.kernel.accessAllowed && !cache) {\n", " return;\n", " }\n", " element.appendChild(document.createTextNode(''));\n", " const url = await google.colab.kernel.proxyPort(port, {cache});\n", "\n", " const external_link = document.createElement('div');\n", " external_link.innerHTML = `\n", " \n", " `;\n", " element.appendChild(external_link);\n", "\n", " const iframe = document.createElement('iframe');\n", " iframe.src = new URL(path, url).toString();\n", " iframe.height = height;\n", " iframe.allow = \"autoplay; camera; microphone; clipboard-read; clipboard-write;\"\n", " iframe.width = width;\n", " iframe.style.border = 0;\n", " element.appendChild(iframe);\n", " })(7861, \"/\", \"100%\", 500, false, window.element)" ] }, "metadata": {} } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "fqFPXldYOm0X" }, "execution_count": 36, "outputs": [] } ] }