Spaces:

ashwincv0112
/

LangChain_Testing_Version2

Runtime error

App Files Files Community

ashwincv0112 commited on Aug 30, 2023

Commit

72b7a20

1 Parent(s): e87e0f8

langChain QuestionMyDoc ChatBot

Browse files

Files changed (5) hide show

QuestionMyDoc_Manual_Version.ipynb +292 -0
README.md +5 -5
app.py +36 -0
guide1.txt +0 -0
requirements.txt +4 -0

QuestionMyDoc_Manual_Version.ipynb ADDED Viewed

	@@ -0,0 +1,292 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": 23,
+      "metadata": {
+        "id": "76BpiP5vMhpG"
+      },
+      "outputs": [],
+      "source": [
+        "# !pip install openai langchain python-dotenv -q"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# !pip install chromadb==0.3.22 tiktoken -q"
+      ],
+      "metadata": {
+        "id": "ASD5ljxgNNbs"
+      },
+      "execution_count": 24,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# !pip install chromadb -U"
+      ],
+      "metadata": {
+        "id": "8IWdv5UgNP6c"
+      },
+      "execution_count": 25,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# !pip install gradio"
+      ],
+      "metadata": {
+        "id": "DliXsYaZOtAH"
+      },
+      "execution_count": 26,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from langchain.embeddings.openai import OpenAIEmbeddings\n",
+        "from langchain.vectorstores import Chroma\n",
+        "from langchain.text_splitter import CharacterTextSplitter\n",
+        "from langchain.chains.question_answering import load_qa_chain\n",
+        "from langchain.llms import OpenAI\n",
+        "import os\n"
+      ],
+      "metadata": {
+        "id": "jGEXeboZNAb9"
+      },
+      "execution_count": 27,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "with open(\"/content/Data_Engineering.txt\") as f:\n",
+        "    hitchhikersguide = f.read()"
+      ],
+      "metadata": {
+        "id": "h4QnGIJYNjeM"
+      },
+      "execution_count": 28,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0, separator = \"\\n\")\n",
+        "texts = text_splitter.split_text(hitchhikersguide)\n",
+        "print(f\"Final lenght: {len(texts)}\")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "RmfWIfclN4DP",
+        "outputId": "58e3ffcf-b56a-4120-bcd9-718396bfa49c"
+      },
+      "execution_count": 29,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Final lenght: 1\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "### Setting up the OpenAI env\n",
+        "\n",
+        "!echo OPENAI_API_KEY=\"\" > .env"
+      ],
+      "metadata": {
+        "id": "4Y4-ZTsZONsZ"
+      },
+      "execution_count": 30,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import os\n",
+        "import openai\n",
+        "from dotenv import load_dotenv\n",
+        "\n",
+        "load_dotenv(\".env\")\n",
+        "\n",
+        "openai.api_key = os.environ.get(\"OPENAI_API_KEY\")"
+      ],
+      "metadata": {
+        "id": "PPYw5waOOT0D"
+      },
+      "execution_count": 31,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "embeddings = OpenAIEmbeddings()"
+      ],
+      "metadata": {
+        "id": "pj-lRr3UODGm"
+      },
+      "execution_count": 32,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{\"source\": str(i)} for i in range(len(texts))]).as_retriever()"
+      ],
+      "metadata": {
+        "id": "DcDeDj9HOFgI"
+      },
+      "execution_count": 33,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Creating the Chain Model\n",
+        "chain = load_qa_chain(OpenAI(temperature=0), chain_type=\"stuff\")"
+      ],
+      "metadata": {
+        "id": "7Sh5PEFoOcF9"
+      },
+      "execution_count": 34,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def make_inference(query):\n",
+        "    docs = docsearch.get_relevant_documents(query)\n",
+        "    return(chain.run(input_documents=docs, question=query))"
+      ],
+      "metadata": {
+        "id": "meb-lvSsOgsM"
+      },
+      "execution_count": 35,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import gradio\n",
+        "\n",
+        "if __name__ == \"__main__\":\n",
+        "    # make a gradio interface\n",
+        "    import gradio as gr\n",
+        "\n",
+        "    gr.Interface(\n",
+        "        make_inference,\n",
+        "        [\n",
+        "            gr.inputs.Textbox(lines=2, label=\"Query\"),\n",
+        "        ],\n",
+        "        gr.outputs.Textbox(label=\"Response\"),\n",
+        "        title=\"🗣️TalkToMyDoc📄\",\n",
+        "        description=\"🗣️TalkToMyDoc📄 is a tool that allows you to ask questions about a document. In this case - Hitch Hitchhiker's Guide to the Galaxy.\",\n",
+        "    ).launch()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 781
+        },
+        "id": "-btP40G1OkgI",
+        "outputId": "062d6b92-d8c2-4256-deef-023bb9b0292a"
+      },
+      "execution_count": 36,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "<ipython-input-36-636b02531079>:10: GradioDeprecationWarning: Usage of gradio.inputs is deprecated, and will not be supported in the future, please import your component from gradio.components\n",
+            "  gr.inputs.Textbox(lines=2, label=\"Query\"),\n",
+            "<ipython-input-36-636b02531079>:10: GradioDeprecationWarning: `optional` parameter is deprecated, and it has no effect\n",
+            "  gr.inputs.Textbox(lines=2, label=\"Query\"),\n",
+            "<ipython-input-36-636b02531079>:10: GradioDeprecationWarning: `numeric` parameter is deprecated, and it has no effect\n",
+            "  gr.inputs.Textbox(lines=2, label=\"Query\"),\n",
+            "<ipython-input-36-636b02531079>:12: GradioDeprecationWarning: Usage of gradio.outputs is deprecated, and will not be supported in the future, please import your components from gradio.components\n",
+            "  gr.outputs.Textbox(label=\"Response\"),\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n",
+            "Note: opening Chrome Inspector may crash demo inside Colab notebooks.\n",
+            "\n",
+            "To create a public link, set `share=True` in `launch()`.\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.Javascript object>"
+            ],
+            "application/javascript": [
+              "(async (port, path, width, height, cache, element) => {\n",
+              "                        if (!google.colab.kernel.accessAllowed && !cache) {\n",
+              "                            return;\n",
+              "                        }\n",
+              "                        element.appendChild(document.createTextNode(''));\n",
+              "                        const url = await google.colab.kernel.proxyPort(port, {cache});\n",
+              "\n",
+              "                        const external_link = document.createElement('div');\n",
+              "                        external_link.innerHTML = `\n",
+              "                            <div style=\"font-family: monospace; margin-bottom: 0.5rem\">\n",
+              "                                Running on <a href=${new URL(path, url).toString()} target=\"_blank\">\n",
+              "                                    https://localhost:${port}${path}\n",
+              "                                </a>\n",
+              "                            </div>\n",
+              "                        `;\n",
+              "                        element.appendChild(external_link);\n",
+              "\n",
+              "                        const iframe = document.createElement('iframe');\n",
+              "                        iframe.src = new URL(path, url).toString();\n",
+              "                        iframe.height = height;\n",
+              "                        iframe.allow = \"autoplay; camera; microphone; clipboard-read; clipboard-write;\"\n",
+              "                        iframe.width = width;\n",
+              "                        iframe.style.border = 0;\n",
+              "                        element.appendChild(iframe);\n",
+              "                    })(7861, \"/\", \"100%\", 500, false, window.element)"
+            ]
+          },
+          "metadata": {}
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "fqFPXldYOm0X"
+      },
+      "execution_count": 36,
+      "outputs": []
+    }
+  ]
+}

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
-title: LangChain Testing Version2
-emoji: 📉
-colorFrom: green
-colorTo: yellow
 sdk: gradio
-sdk_version: 3.41.2
 app_file: app.py
 pinned: false
 license: openrail

 ---
+title: TalkToMyDoc Hitch Hikers Guide
+emoji: 🐠
+colorFrom: blue
+colorTo: green
 sdk: gradio
+sdk_version: 3.27.0
 app_file: app.py
 pinned: false
 license: openrail

app.py ADDED Viewed

	@@ -0,0 +1,36 @@

+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.vectorstores import Chroma
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.chains.question_answering import load_qa_chain
+from langchain.llms import OpenAI
+import os
+with open("guide1.txt") as f:
+    hitchhikersguide = f.read()
+text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0, separator = "\n")
+texts = text_splitter.split_text(hitchhikersguide)
+embeddings = OpenAIEmbeddings()
+docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))]).as_retriever()
+chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff")
+def make_inference(query):
+    docs = docsearch.get_relevant_documents(query)
+    return(chain.run(input_documents=docs, question=query))
+if __name__ == "__main__":
+    # make a gradio interface
+    import gradio as gr
+    gr.Interface(
+        make_inference,
+        [
+            gr.inputs.Textbox(lines=2, label="Query"),
+        ],
+        gr.outputs.Textbox(label="Response"),
+        title="🗣️TalkToMyDoc📄",
+        description="🗣️TalkToMyDoc📄 is a tool that allows you to ask questions about a document. In this case - Hitch Hitchhiker's Guide to the Galaxy.",
+    ).launch()

guide1.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+langchain
+openai
+tiktoken
+chromadb