herMaster commited on
Commit
1a8b103
β€’
1 Parent(s): f005fdc

using complete local code and loading llm through ctransformers.

Browse files
Files changed (1) hide show
  1. app.py +201 -16
app.py CHANGED
@@ -1,23 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
2
  from qdrant_client import models, QdrantClient
3
  from sentence_transformers import SentenceTransformer
4
  from PyPDF2 import PdfReader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
 
6
  from langchain.callbacks.manager import CallbackManager
7
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
8
- # from langchain.llms import LlamaCpp
 
 
 
 
9
  from langchain.vectorstores import Qdrant
 
 
 
10
  from qdrant_client.http import models
11
- # from langchain.llms import CTransformers
 
12
  from ctransformers import AutoModelForCausalLM
13
 
14
-
15
-
16
-
17
-
18
  # loading the embedding model -
19
 
20
- encoder = SentenceTransformer('jinaai/jina-embedding-b-en-v1')
21
 
22
  print("embedding model loaded.............................")
23
  print("####################################################")
@@ -29,7 +206,9 @@ callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
29
  print("loading the LLM......................................")
30
 
31
  # llm = LlamaCpp(
32
- # model_path="TheBloke/Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q8_0.gguf",
 
 
33
  # n_ctx=2048,
34
  # f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls
35
  # callback_manager=callback_manager,
@@ -37,17 +216,16 @@ print("loading the LLM......................................")
37
  # )
38
 
39
  llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
40
- model_file="llama-2-7b-chat.Q8_0.gguf",
41
  model_type="llama",
42
  # config = ctransformers.hub.AutoConfig,
43
  # hf = True
44
  temperature = 0.2,
45
- max_new_tokens = 1024,
46
- stop = ['\n']
47
  )
48
 
49
 
50
-
51
  print("LLM loaded........................................")
52
  print("################################################################")
53
 
@@ -75,7 +253,7 @@ for page in range(num_of_pages):
75
 
76
 
77
  chunks = get_chunks(text)
78
-
79
  print("Chunks are ready.....................................")
80
  print("######################################################")
81
 
@@ -95,11 +273,11 @@ print("Collection created........................................")
95
  print("#########################################################")
96
 
97
 
98
-
99
  li = []
100
  for i in range(len(chunks)):
101
  li.append(i)
102
-
103
  dic = zip(li, chunks)
104
  dic= dict(dic)
105
 
@@ -110,6 +288,8 @@ qdrant.upload_records(
110
  id=idx,
111
  vector=encoder.encode(dic[idx]).tolist(),
112
  payload= {dic[idx][:5] : dic[idx]}
 
 
113
  ) for idx in dic.keys()
114
  ],
115
  )
@@ -128,8 +308,11 @@ def chat(question):
128
  )
129
  context = []
130
  for hit in hits:
 
131
  context.append(list(hit.payload.values())[0])
132
-
 
 
133
  context = context[0] + context[1] + context[2]
134
 
135
  system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
@@ -162,3 +345,5 @@ gr.Interface(
162
  examples=["Hello", "what is the speed of human nerve impulses?"],
163
  # cache_examples=True,
164
  ).launch()
 
 
 
1
+ # import gradio as gr
2
+ # from qdrant_client import models, QdrantClient
3
+ # from sentence_transformers import SentenceTransformer
4
+ # from PyPDF2 import PdfReader
5
+ # from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ # from langchain.callbacks.manager import CallbackManager
7
+ # from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
8
+ # # from langchain.llms import LlamaCpp
9
+ # from langchain.vectorstores import Qdrant
10
+ # from qdrant_client.http import models
11
+ # # from langchain.llms import CTransformers
12
+ # from ctransformers import AutoModelForCausalLM
13
+
14
+
15
+
16
+
17
+
18
+ # # loading the embedding model -
19
+
20
+ # encoder = SentenceTransformer('jinaai/jina-embedding-b-en-v1')
21
+
22
+ # print("embedding model loaded.............................")
23
+ # print("####################################################")
24
+
25
+ # # loading the LLM
26
+
27
+ # callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
28
+
29
+ # print("loading the LLM......................................")
30
+
31
+ # # llm = LlamaCpp(
32
+ # # model_path="TheBloke/Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q8_0.gguf",
33
+ # # n_ctx=2048,
34
+ # # f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls
35
+ # # callback_manager=callback_manager,
36
+ # # verbose=True,
37
+ # # )
38
+
39
+ # llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
40
+ # model_file="llama-2-7b-chat.Q8_0.gguf",
41
+ # model_type="llama",
42
+ # # config = ctransformers.hub.AutoConfig,
43
+ # # hf = True
44
+ # temperature = 0.2,
45
+ # max_new_tokens = 1024,
46
+ # stop = ['\n']
47
+ # )
48
+
49
+
50
+
51
+ # print("LLM loaded........................................")
52
+ # print("################################################################")
53
+
54
+ # def get_chunks(text):
55
+ # text_splitter = RecursiveCharacterTextSplitter(
56
+ # # seperator = "\n",
57
+ # chunk_size = 500,
58
+ # chunk_overlap = 100,
59
+ # length_function = len,
60
+ # )
61
+
62
+ # chunks = text_splitter.split_text(text)
63
+ # return chunks
64
+
65
+
66
+ # pdf_path = './100 Weird Facts About the Human Body.pdf'
67
+
68
+
69
+ # reader = PdfReader(pdf_path)
70
+ # text = ""
71
+ # num_of_pages = len(reader.pages)
72
+ # for page in range(num_of_pages):
73
+ # current_page = reader.pages[page]
74
+ # text += current_page.extract_text()
75
+
76
+
77
+ # chunks = get_chunks(text)
78
+
79
+ # print("Chunks are ready.....................................")
80
+ # print("######################################################")
81
+
82
+ # qdrant = QdrantClient(path = "./db")
83
+ # print("db created................................................")
84
+ # print("#####################################################################")
85
+
86
+ # qdrant.recreate_collection(
87
+ # collection_name="my_facts",
88
+ # vectors_config=models.VectorParams(
89
+ # size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model
90
+ # distance=models.Distance.COSINE,
91
+ # ),
92
+ # )
93
+
94
+ # print("Collection created........................................")
95
+ # print("#########################################################")
96
+
97
+
98
+
99
+ # li = []
100
+ # for i in range(len(chunks)):
101
+ # li.append(i)
102
+
103
+ # dic = zip(li, chunks)
104
+ # dic= dict(dic)
105
+
106
+ # qdrant.upload_records(
107
+ # collection_name="my_facts",
108
+ # records=[
109
+ # models.Record(
110
+ # id=idx,
111
+ # vector=encoder.encode(dic[idx]).tolist(),
112
+ # payload= {dic[idx][:5] : dic[idx]}
113
+ # ) for idx in dic.keys()
114
+ # ],
115
+ # )
116
+
117
+ # print("Records uploaded........................................")
118
+ # print("###########################################################")
119
+
120
+ # def chat(question):
121
+ # # question = input("ask question from pdf.....")
122
+
123
+
124
+ # hits = qdrant.search(
125
+ # collection_name="my_facts",
126
+ # query_vector=encoder.encode(question).tolist(),
127
+ # limit=3
128
+ # )
129
+ # context = []
130
+ # for hit in hits:
131
+ # context.append(list(hit.payload.values())[0])
132
+
133
+ # context = context[0] + context[1] + context[2]
134
+
135
+ # system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
136
+ # Read the given context before answering questions and think step by step. If you can not answer a user question based on
137
+ # the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question."""
138
+
139
+
140
+ # B_INST, E_INST = "[INST]", "[/INST]"
141
+
142
+ # B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
143
+
144
+ # SYSTEM_PROMPT = B_SYS + system_prompt + E_SYS
145
+
146
+ # instruction = f"""
147
+ # Context: {context}
148
+ # User: {question}"""
149
+
150
+ # prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST
151
+
152
+ # result = llm(prompt_template)
153
+ # return result
154
+
155
+ # gr.Interface(
156
+ # fn = chat,
157
+ # inputs = gr.Textbox(lines = 10, placeholder = "Enter your question here πŸ‘‰"),
158
+ # outputs = gr.Textbox(lines = 10, placeholder = "Your answer will be here soon πŸš€"),
159
+ # title="Q&N with PDF πŸ‘©πŸ»β€πŸ’»πŸ““βœπŸ»πŸ’‘",
160
+ # description="This app facilitates a conversation with PDFs available on https://www.delo.si/assets/media/other/20110728/100%20Weird%20Facts%20About%20the%20Human%20Body.pdfπŸ’‘",
161
+ # theme="soft",
162
+ # examples=["Hello", "what is the speed of human nerve impulses?"],
163
+ # # cache_examples=True,
164
+ # ).launch()
165
+
166
+
167
  import gradio as gr
168
+ from threading import Thread
169
+ from queue import SimpleQueue
170
+ from typing import Any, Dict, List, Union
171
+ from langchain.callbacks.base import BaseCallbackHandler
172
+ from langchain.schema import LLMResult
173
  from qdrant_client import models, QdrantClient
174
  from sentence_transformers import SentenceTransformer
175
  from PyPDF2 import PdfReader
176
  from langchain.text_splitter import RecursiveCharacterTextSplitter
177
+ from qdrant_client.models import PointStruct
178
+ import os
179
  from langchain.callbacks.manager import CallbackManager
180
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
181
+ # from qdrant_client import QdrantClient
182
+ # from langchain import VectorDBQA - This is obsolete
183
+ from langchain.chains import RetrievalQA
184
+ from langchain.llms import LlamaCpp
185
+ # from PyPDF2 import PdfReader
186
  from langchain.vectorstores import Qdrant
187
+ # from langchain.text_splitter import RecursiveCharacterTextSplitter
188
+ from langchain.embeddings import HuggingFaceBgeEmbeddings
189
+ from transformers import AutoModel
190
  from qdrant_client.http import models
191
+ # from sentence_transformers import SentenceTransformer
192
+ from langchain.prompts import PromptTemplate
193
  from ctransformers import AutoModelForCausalLM
194
 
 
 
 
 
195
  # loading the embedding model -
196
 
197
+ encoder = SentenceTransformer("all-MiniLM-L6-v2")
198
 
199
  print("embedding model loaded.............................")
200
  print("####################################################")
 
206
  print("loading the LLM......................................")
207
 
208
  # llm = LlamaCpp(
209
+ # model_path="/home/devangpagare/llm/models/llama-2-7b-chat.Q3_K_S.gguf",
210
+ # # n_gpu_layers=n_gpu_layers,
211
+ # # n_batch=n_batch,
212
  # n_ctx=2048,
213
  # f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls
214
  # callback_manager=callback_manager,
 
216
  # )
217
 
218
  llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
219
+ model_file="llama-2-7b-chat.Q3_K_S.gguf",
220
  model_type="llama",
221
  # config = ctransformers.hub.AutoConfig,
222
  # hf = True
223
  temperature = 0.2,
224
+ # max_new_tokens = 1024,
225
+ # stop = ['\n']
226
  )
227
 
228
 
 
229
  print("LLM loaded........................................")
230
  print("################################################################")
231
 
 
253
 
254
 
255
  chunks = get_chunks(text)
256
+ print(chunks)
257
  print("Chunks are ready.....................................")
258
  print("######################################################")
259
 
 
273
  print("#########################################################")
274
 
275
 
276
+ # starting a list of same size as chunks
277
  li = []
278
  for i in range(len(chunks)):
279
  li.append(i)
280
+ # concantinating the li and chunks to create a dcitionary
281
  dic = zip(li, chunks)
282
  dic= dict(dic)
283
 
 
288
  id=idx,
289
  vector=encoder.encode(dic[idx]).tolist(),
290
  payload= {dic[idx][:5] : dic[idx]}
291
+ ## payload is always suppose to be a dictionary with both keys and values as strings. To do this, I used first 5 chars of
292
+ ## every value as key to make the payload.
293
  ) for idx in dic.keys()
294
  ],
295
  )
 
308
  )
309
  context = []
310
  for hit in hits:
311
+ # print(hit.payload, "score:", hit.score)
312
  context.append(list(hit.payload.values())[0])
313
+ # context += str(hit.payload[hit.payload.values()[:5]])
314
+ # print("##################################################################")
315
+
316
  context = context[0] + context[1] + context[2]
317
 
318
  system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
 
345
  examples=["Hello", "what is the speed of human nerve impulses?"],
346
  # cache_examples=True,
347
  ).launch()
348
+
349
+