ashwincv0112 commited on
Commit
72b7a20
·
1 Parent(s): e87e0f8

langChain QuestionMyDoc ChatBot

Browse files
Files changed (5) hide show
  1. QuestionMyDoc_Manual_Version.ipynb +292 -0
  2. README.md +5 -5
  3. app.py +36 -0
  4. guide1.txt +0 -0
  5. requirements.txt +4 -0
QuestionMyDoc_Manual_Version.ipynb ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": []
7
+ },
8
+ "kernelspec": {
9
+ "name": "python3",
10
+ "display_name": "Python 3"
11
+ },
12
+ "language_info": {
13
+ "name": "python"
14
+ }
15
+ },
16
+ "cells": [
17
+ {
18
+ "cell_type": "code",
19
+ "execution_count": 23,
20
+ "metadata": {
21
+ "id": "76BpiP5vMhpG"
22
+ },
23
+ "outputs": [],
24
+ "source": [
25
+ "# !pip install openai langchain python-dotenv -q"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "source": [
31
+ "# !pip install chromadb==0.3.22 tiktoken -q"
32
+ ],
33
+ "metadata": {
34
+ "id": "ASD5ljxgNNbs"
35
+ },
36
+ "execution_count": 24,
37
+ "outputs": []
38
+ },
39
+ {
40
+ "cell_type": "code",
41
+ "source": [
42
+ "# !pip install chromadb -U"
43
+ ],
44
+ "metadata": {
45
+ "id": "8IWdv5UgNP6c"
46
+ },
47
+ "execution_count": 25,
48
+ "outputs": []
49
+ },
50
+ {
51
+ "cell_type": "code",
52
+ "source": [
53
+ "# !pip install gradio"
54
+ ],
55
+ "metadata": {
56
+ "id": "DliXsYaZOtAH"
57
+ },
58
+ "execution_count": 26,
59
+ "outputs": []
60
+ },
61
+ {
62
+ "cell_type": "code",
63
+ "source": [
64
+ "from langchain.embeddings.openai import OpenAIEmbeddings\n",
65
+ "from langchain.vectorstores import Chroma\n",
66
+ "from langchain.text_splitter import CharacterTextSplitter\n",
67
+ "from langchain.chains.question_answering import load_qa_chain\n",
68
+ "from langchain.llms import OpenAI\n",
69
+ "import os\n"
70
+ ],
71
+ "metadata": {
72
+ "id": "jGEXeboZNAb9"
73
+ },
74
+ "execution_count": 27,
75
+ "outputs": []
76
+ },
77
+ {
78
+ "cell_type": "code",
79
+ "source": [
80
+ "with open(\"/content/Data_Engineering.txt\") as f:\n",
81
+ " hitchhikersguide = f.read()"
82
+ ],
83
+ "metadata": {
84
+ "id": "h4QnGIJYNjeM"
85
+ },
86
+ "execution_count": 28,
87
+ "outputs": []
88
+ },
89
+ {
90
+ "cell_type": "code",
91
+ "source": [
92
+ "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0, separator = \"\\n\")\n",
93
+ "texts = text_splitter.split_text(hitchhikersguide)\n",
94
+ "print(f\"Final lenght: {len(texts)}\")"
95
+ ],
96
+ "metadata": {
97
+ "colab": {
98
+ "base_uri": "https://localhost:8080/"
99
+ },
100
+ "id": "RmfWIfclN4DP",
101
+ "outputId": "58e3ffcf-b56a-4120-bcd9-718396bfa49c"
102
+ },
103
+ "execution_count": 29,
104
+ "outputs": [
105
+ {
106
+ "output_type": "stream",
107
+ "name": "stdout",
108
+ "text": [
109
+ "Final lenght: 1\n"
110
+ ]
111
+ }
112
+ ]
113
+ },
114
+ {
115
+ "cell_type": "code",
116
+ "source": [
117
+ "### Setting up the OpenAI env\n",
118
+ "\n",
119
+ "!echo OPENAI_API_KEY=\"\" > .env"
120
+ ],
121
+ "metadata": {
122
+ "id": "4Y4-ZTsZONsZ"
123
+ },
124
+ "execution_count": 30,
125
+ "outputs": []
126
+ },
127
+ {
128
+ "cell_type": "code",
129
+ "source": [
130
+ "import os\n",
131
+ "import openai\n",
132
+ "from dotenv import load_dotenv\n",
133
+ "\n",
134
+ "load_dotenv(\".env\")\n",
135
+ "\n",
136
+ "openai.api_key = os.environ.get(\"OPENAI_API_KEY\")"
137
+ ],
138
+ "metadata": {
139
+ "id": "PPYw5waOOT0D"
140
+ },
141
+ "execution_count": 31,
142
+ "outputs": []
143
+ },
144
+ {
145
+ "cell_type": "code",
146
+ "source": [
147
+ "embeddings = OpenAIEmbeddings()"
148
+ ],
149
+ "metadata": {
150
+ "id": "pj-lRr3UODGm"
151
+ },
152
+ "execution_count": 32,
153
+ "outputs": []
154
+ },
155
+ {
156
+ "cell_type": "code",
157
+ "source": [
158
+ "docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{\"source\": str(i)} for i in range(len(texts))]).as_retriever()"
159
+ ],
160
+ "metadata": {
161
+ "id": "DcDeDj9HOFgI"
162
+ },
163
+ "execution_count": 33,
164
+ "outputs": []
165
+ },
166
+ {
167
+ "cell_type": "code",
168
+ "source": [
169
+ "# Creating the Chain Model\n",
170
+ "chain = load_qa_chain(OpenAI(temperature=0), chain_type=\"stuff\")"
171
+ ],
172
+ "metadata": {
173
+ "id": "7Sh5PEFoOcF9"
174
+ },
175
+ "execution_count": 34,
176
+ "outputs": []
177
+ },
178
+ {
179
+ "cell_type": "code",
180
+ "source": [
181
+ "def make_inference(query):\n",
182
+ " docs = docsearch.get_relevant_documents(query)\n",
183
+ " return(chain.run(input_documents=docs, question=query))"
184
+ ],
185
+ "metadata": {
186
+ "id": "meb-lvSsOgsM"
187
+ },
188
+ "execution_count": 35,
189
+ "outputs": []
190
+ },
191
+ {
192
+ "cell_type": "code",
193
+ "source": [
194
+ "import gradio\n",
195
+ "\n",
196
+ "if __name__ == \"__main__\":\n",
197
+ " # make a gradio interface\n",
198
+ " import gradio as gr\n",
199
+ "\n",
200
+ " gr.Interface(\n",
201
+ " make_inference,\n",
202
+ " [\n",
203
+ " gr.inputs.Textbox(lines=2, label=\"Query\"),\n",
204
+ " ],\n",
205
+ " gr.outputs.Textbox(label=\"Response\"),\n",
206
+ " title=\"🗣️TalkToMyDoc📄\",\n",
207
+ " description=\"🗣️TalkToMyDoc📄 is a tool that allows you to ask questions about a document. In this case - Hitch Hitchhiker's Guide to the Galaxy.\",\n",
208
+ " ).launch()"
209
+ ],
210
+ "metadata": {
211
+ "colab": {
212
+ "base_uri": "https://localhost:8080/",
213
+ "height": 781
214
+ },
215
+ "id": "-btP40G1OkgI",
216
+ "outputId": "062d6b92-d8c2-4256-deef-023bb9b0292a"
217
+ },
218
+ "execution_count": 36,
219
+ "outputs": [
220
+ {
221
+ "output_type": "stream",
222
+ "name": "stderr",
223
+ "text": [
224
+ "<ipython-input-36-636b02531079>:10: GradioDeprecationWarning: Usage of gradio.inputs is deprecated, and will not be supported in the future, please import your component from gradio.components\n",
225
+ " gr.inputs.Textbox(lines=2, label=\"Query\"),\n",
226
+ "<ipython-input-36-636b02531079>:10: GradioDeprecationWarning: `optional` parameter is deprecated, and it has no effect\n",
227
+ " gr.inputs.Textbox(lines=2, label=\"Query\"),\n",
228
+ "<ipython-input-36-636b02531079>:10: GradioDeprecationWarning: `numeric` parameter is deprecated, and it has no effect\n",
229
+ " gr.inputs.Textbox(lines=2, label=\"Query\"),\n",
230
+ "<ipython-input-36-636b02531079>:12: GradioDeprecationWarning: Usage of gradio.outputs is deprecated, and will not be supported in the future, please import your components from gradio.components\n",
231
+ " gr.outputs.Textbox(label=\"Response\"),\n"
232
+ ]
233
+ },
234
+ {
235
+ "output_type": "stream",
236
+ "name": "stdout",
237
+ "text": [
238
+ "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n",
239
+ "Note: opening Chrome Inspector may crash demo inside Colab notebooks.\n",
240
+ "\n",
241
+ "To create a public link, set `share=True` in `launch()`.\n"
242
+ ]
243
+ },
244
+ {
245
+ "output_type": "display_data",
246
+ "data": {
247
+ "text/plain": [
248
+ "<IPython.core.display.Javascript object>"
249
+ ],
250
+ "application/javascript": [
251
+ "(async (port, path, width, height, cache, element) => {\n",
252
+ " if (!google.colab.kernel.accessAllowed && !cache) {\n",
253
+ " return;\n",
254
+ " }\n",
255
+ " element.appendChild(document.createTextNode(''));\n",
256
+ " const url = await google.colab.kernel.proxyPort(port, {cache});\n",
257
+ "\n",
258
+ " const external_link = document.createElement('div');\n",
259
+ " external_link.innerHTML = `\n",
260
+ " <div style=\"font-family: monospace; margin-bottom: 0.5rem\">\n",
261
+ " Running on <a href=${new URL(path, url).toString()} target=\"_blank\">\n",
262
+ " https://localhost:${port}${path}\n",
263
+ " </a>\n",
264
+ " </div>\n",
265
+ " `;\n",
266
+ " element.appendChild(external_link);\n",
267
+ "\n",
268
+ " const iframe = document.createElement('iframe');\n",
269
+ " iframe.src = new URL(path, url).toString();\n",
270
+ " iframe.height = height;\n",
271
+ " iframe.allow = \"autoplay; camera; microphone; clipboard-read; clipboard-write;\"\n",
272
+ " iframe.width = width;\n",
273
+ " iframe.style.border = 0;\n",
274
+ " element.appendChild(iframe);\n",
275
+ " })(7861, \"/\", \"100%\", 500, false, window.element)"
276
+ ]
277
+ },
278
+ "metadata": {}
279
+ }
280
+ ]
281
+ },
282
+ {
283
+ "cell_type": "code",
284
+ "source": [],
285
+ "metadata": {
286
+ "id": "fqFPXldYOm0X"
287
+ },
288
+ "execution_count": 36,
289
+ "outputs": []
290
+ }
291
+ ]
292
+ }
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
- title: LangChain Testing Version2
3
- emoji: 📉
4
- colorFrom: green
5
- colorTo: yellow
6
  sdk: gradio
7
- sdk_version: 3.41.2
8
  app_file: app.py
9
  pinned: false
10
  license: openrail
 
1
  ---
2
+ title: TalkToMyDoc Hitch Hikers Guide
3
+ emoji: 🐠
4
+ colorFrom: blue
5
+ colorTo: green
6
  sdk: gradio
7
+ sdk_version: 3.27.0
8
  app_file: app.py
9
  pinned: false
10
  license: openrail
app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.embeddings.openai import OpenAIEmbeddings
2
+ from langchain.vectorstores import Chroma
3
+ from langchain.text_splitter import CharacterTextSplitter
4
+ from langchain.chains.question_answering import load_qa_chain
5
+ from langchain.llms import OpenAI
6
+ import os
7
+
8
+ with open("guide1.txt") as f:
9
+ hitchhikersguide = f.read()
10
+
11
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0, separator = "\n")
12
+ texts = text_splitter.split_text(hitchhikersguide)
13
+
14
+ embeddings = OpenAIEmbeddings()
15
+
16
+ docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))]).as_retriever()
17
+
18
+ chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff")
19
+
20
+ def make_inference(query):
21
+ docs = docsearch.get_relevant_documents(query)
22
+ return(chain.run(input_documents=docs, question=query))
23
+
24
+ if __name__ == "__main__":
25
+ # make a gradio interface
26
+ import gradio as gr
27
+
28
+ gr.Interface(
29
+ make_inference,
30
+ [
31
+ gr.inputs.Textbox(lines=2, label="Query"),
32
+ ],
33
+ gr.outputs.Textbox(label="Response"),
34
+ title="🗣️TalkToMyDoc📄",
35
+ description="🗣️TalkToMyDoc📄 is a tool that allows you to ask questions about a document. In this case - Hitch Hitchhiker's Guide to the Galaxy.",
36
+ ).launch()
guide1.txt ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ langchain
2
+ openai
3
+ tiktoken
4
+ chromadb