mohcineelharras commited on
Commit
8df831b
·
1 Parent(s): c89b497

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -60
app.py CHANGED
@@ -80,6 +80,7 @@ template = (
80
  "If a question is asked about content not in the documents or context, respond with 'I do not have that information.' "
81
  "Always respond in the same language as the question was asked. Be concise.\n"
82
  "Respond to the best of your ability. Try to respond in markdown.\"\n"
 
83
  "context\n"
84
  "{context}\n"
85
  "user\n"
@@ -90,30 +91,32 @@ template = (
90
 
91
  # --------------------------------cache LLM-----------------------------------
92
 
93
- logging.basicConfig(stream=sys.stdout, level=logging.INFO)
94
- logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
95
- llama_debug = LlamaDebugHandler(print_trace_on_end=True)
96
- callback_manager = CallbackManager([llama_debug])
97
-
98
  @st.cache_resource
99
- #One doc embedding
100
- def load_emb_uploaded_document(filename):
101
- # You may want to add a check to prevent execution during initialization.
102
- if 'init' in st.session_state:
103
- embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base")
104
- service_context = ServiceContext.from_defaults(embed_model=embed_model_inst, llm=llm, chunk_size=500)
105
- documents = SimpleDirectoryReader(input_files=[filename]).load_data()
106
- index = VectorStoreIndex.from_documents(
107
- documents, service_context=service_context, show_progress=True)
108
- return index.as_query_engine(text_qa_template=text_qa_template, refine_template=refine_template)
109
- return None
 
 
 
 
 
 
 
110
 
111
  # --------------------------------cache Embedding model-----------------------------------
112
 
113
  @st.cache_resource
114
  def load_emb_model():
115
  if not os.path.exists("data"):
116
- st.error("Data directory does not exist. Please upload the data.")
117
  os.makedirs("data")
118
  return None #
119
  embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base"
@@ -126,44 +129,44 @@ def load_emb_model():
126
  index = VectorStoreIndex.from_documents(
127
  documents, service_context=service_context, show_progress=True)
128
  return index.as_query_engine(text_qa_template=text_qa_template, refine_template=refine_template)
 
129
 
130
- # --------------------------------cache Embedding model-----------------------------------
 
 
 
 
 
131
 
132
- # LLM
133
  @st.cache_resource
134
- def load_llm_model():
135
- if not os.path.exists("models"):
136
- st.error("models directory does not exist. Please download and copy paste a model in folder models.")
137
- os.makedirs("models")
138
- return None #
139
- llm = LlamaCPP(
140
- #model_url="https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q5_K_M.gguf",
141
- model_path="models/dolphin-2.1-mistral-7b.Q4_K_S.gguf",
142
- temperature=0.0,
143
- max_new_tokens=100,
144
- context_window=4096,
145
- generate_kwargs={},
146
- model_kwargs={"n_gpu_layers": 20},
147
- verbose=True,
148
- )
149
- return llm
150
 
151
  # ------------------------------------session state----------------------------------------
152
 
153
  if 'memory' not in st.session_state:
154
  st.session_state.memory = ""
155
 
156
- # LLM Model Loading
157
- if 'llm_model' not in st.session_state:
158
- st.session_state.llm_model = load_llm_model()
159
- # Use the models from session state
160
- llm = st.session_state.llm_model
161
-
162
- # Embedding Model Loading
163
- if 'emb_model' not in st.session_state:
164
- st.session_state.emb_model = load_emb_model()
165
- # Use the models from session state
166
- query_engine = st.session_state.emb_model
167
 
168
  # ------------------------------------layout----------------------------------------
169
 
@@ -189,7 +192,7 @@ tab1, tab2, tab3 = st.tabs(["LLM only", "LLM RAG QA with database", "One single
189
 
190
  with tab1:
191
  st.title("💬 LLM only")
192
- prompt = st.text_input(
193
  "Ask your question here",
194
  placeholder="How do miners contribute to the security of the blockchain ?",
195
  )
@@ -208,7 +211,7 @@ with tab1:
208
  with tab2:
209
  st.title("💬 LLM RAG QA with database")
210
  st.write("To consult files that are available in the database, go to https://huggingface.co/spaces/mohcineelharras/llama-index-docs-spaces/tree/main/data")
211
- prompt = st.text_input(
212
  "Ask your question here",
213
  placeholder="Who is Mohcine ?",
214
  )
@@ -234,13 +237,12 @@ with tab2:
234
 
235
  with tab3:
236
  st.title("📝 One single document Q&A with Llama Index using local open llms")
237
- if st.button('Reinitialize Query Engine', key='reinit_engine'):
238
- del st.session_state["emb_model_upload_doc"]
239
- st.session_state.emb_model_upload_doc = ""
240
- st.write("Query engine reinitialized.")
241
 
242
  uploaded_file = st.file_uploader("Upload an File", type=("txt", "csv", "md","pdf"))
243
- question = st.text_input(
244
  "Ask something about the files",
245
  placeholder="Can you give me a short summary?",
246
  disabled=not uploaded_file,
@@ -251,22 +253,23 @@ with tab3:
251
 
252
  if uploaded_file:
253
  if not os.path.exists("draft_docs"):
254
- st.error("draft_docs directory does not exist. Please download and copy paste a model in folder models.")
255
  os.makedirs("draft_docs")
256
  with open("draft_docs/"+uploaded_file.name, "wb") as f:
257
  text = uploaded_file.read()
258
  f.write(text)
259
  text = uploaded_file.read()
260
  # Embedding Model Loading
261
- if 'emb_model_upload_doc' not in st.session_state:
262
- st.session_state.emb_model_upload_doc = load_emb_uploaded_document("draft_docs/"+uploaded_file.name)
263
- # Use the models from session state
264
- query_engine_upload_doc = st.session_state.emb_model_upload_doc
265
  # if load_emb_uploaded_document:
266
  # load_emb_uploaded_document.clear()
267
  #load_emb_uploaded_document.clear()
268
  st.write("File ",uploaded_file.name, "was loaded successfully")
269
-
 
 
 
 
 
270
  if uploaded_file and question and api_server_info:
271
  contextual_prompt = st.session_state.memory + "\n" + question
272
  response = query_engine_upload_doc.query(contextual_prompt)
@@ -288,7 +291,8 @@ with tab3:
288
  #st.write()
289
  #print("Is File uploaded : ",uploaded_file==True, "Is question asked : ", question==True, "Is question asked : ", api_server_info==True)
290
 
291
- st.subheader('⚠️ Warning: To avoid lags')
 
292
  st.markdown("Please consider **delete input prompt** and **clear memory** with the button on sidebar, each time you switch to another tab")
293
  st.markdown("If you've got a GPU locally, the execution could be a **lot faster** (approximately 5 seconds on my local machine).")
294
 
 
80
  "If a question is asked about content not in the documents or context, respond with 'I do not have that information.' "
81
  "Always respond in the same language as the question was asked. Be concise.\n"
82
  "Respond to the best of your ability. Try to respond in markdown.\"\n"
83
+ "If the user prompt is in French, YOU MUST ANSWER IN FRENCH. Otherwise, speak English\"\n"
84
  "context\n"
85
  "{context}\n"
86
  "user\n"
 
91
 
92
  # --------------------------------cache LLM-----------------------------------
93
 
94
+ # LLM
 
 
 
 
95
  @st.cache_resource
96
+ def load_llm_model():
97
+ if not os.path.exists("models"):
98
+ os.makedirs("models")
99
+ return None #
100
+ llm = LlamaCPP(
101
+ #model_url="https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q5_K_M.gguf",
102
+ model_path="models/dolphin-2.1-mistral-7b.Q4_K_S.gguf",
103
+ temperature=0.0,
104
+ max_new_tokens=100,
105
+ context_window=4096,
106
+ generate_kwargs={},
107
+ model_kwargs={"n_gpu_layers": 20},
108
+ verbose=True,
109
+ )
110
+ return llm
111
+
112
+ llm = load_llm_model()
113
+
114
 
115
  # --------------------------------cache Embedding model-----------------------------------
116
 
117
  @st.cache_resource
118
  def load_emb_model():
119
  if not os.path.exists("data"):
 
120
  os.makedirs("data")
121
  return None #
122
  embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base"
 
129
  index = VectorStoreIndex.from_documents(
130
  documents, service_context=service_context, show_progress=True)
131
  return index.as_query_engine(text_qa_template=text_qa_template, refine_template=refine_template)
132
+ query_engine = load_emb_model()
133
 
134
+ # --------------------------------cache embd one doc-----------------------------------
135
+
136
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
137
+ logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
138
+ llama_debug = LlamaDebugHandler(print_trace_on_end=True)
139
+ callback_manager = CallbackManager([llama_debug])
140
 
 
141
  @st.cache_resource
142
+ #One doc embedding
143
+ def load_emb_uploaded_document(filename):
144
+ # You may want to add a check to prevent execution during initialization.
145
+ if 'init' in st.session_state:
146
+ embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base")
147
+ service_context = ServiceContext.from_defaults(embed_model=embed_model_inst, llm=llm, chunk_size=500)
148
+ documents = SimpleDirectoryReader(input_files=[filename]).load_data()
149
+ index = VectorStoreIndex.from_documents(
150
+ documents, service_context=service_context, show_progress=True)
151
+ return index.as_query_engine(text_qa_template=text_qa_template, refine_template=refine_template)
152
+ return None
 
 
 
 
 
153
 
154
  # ------------------------------------session state----------------------------------------
155
 
156
  if 'memory' not in st.session_state:
157
  st.session_state.memory = ""
158
 
159
+ # # LLM Model Loading
160
+ # if 'llm_model' not in st.session_state:
161
+ # st.session_state.llm_model = load_llm_model()
162
+ # # Use the models from session state
163
+ # llm = st.session_state.llm_model
164
+
165
+ # # Embedding Model Loading
166
+ # if 'emb_model' not in st.session_state:
167
+ # st.session_state.emb_model = load_emb_model()
168
+ # # Use the models from session state
169
+ # query_engine = st.session_state.emb_model
170
 
171
  # ------------------------------------layout----------------------------------------
172
 
 
192
 
193
  with tab1:
194
  st.title("💬 LLM only")
195
+ prompt = st.text_area(
196
  "Ask your question here",
197
  placeholder="How do miners contribute to the security of the blockchain ?",
198
  )
 
211
  with tab2:
212
  st.title("💬 LLM RAG QA with database")
213
  st.write("To consult files that are available in the database, go to https://huggingface.co/spaces/mohcineelharras/llama-index-docs-spaces/tree/main/data")
214
+ prompt = st.text_area(
215
  "Ask your question here",
216
  placeholder="Who is Mohcine ?",
217
  )
 
237
 
238
  with tab3:
239
  st.title("📝 One single document Q&A with Llama Index using local open llms")
240
+ # if st.button('Reinitialize Query Engine', key='reinit_engine'):
241
+ # del query_engine_upload_doc
242
+ # st.write("Query engine reinitialized.")
 
243
 
244
  uploaded_file = st.file_uploader("Upload an File", type=("txt", "csv", "md","pdf"))
245
+ question = st.text_area(
246
  "Ask something about the files",
247
  placeholder="Can you give me a short summary?",
248
  disabled=not uploaded_file,
 
253
 
254
  if uploaded_file:
255
  if not os.path.exists("draft_docs"):
 
256
  os.makedirs("draft_docs")
257
  with open("draft_docs/"+uploaded_file.name, "wb") as f:
258
  text = uploaded_file.read()
259
  f.write(text)
260
  text = uploaded_file.read()
261
  # Embedding Model Loading
262
+ query_engine_upload_doc = load_emb_uploaded_document("draft_docs/"+uploaded_file.name)
 
 
 
263
  # if load_emb_uploaded_document:
264
  # load_emb_uploaded_document.clear()
265
  #load_emb_uploaded_document.clear()
266
  st.write("File ",uploaded_file.name, "was loaded successfully")
267
+ else:
268
+ try:
269
+ del query_engine_upload_doc
270
+ except:
271
+ pass
272
+
273
  if uploaded_file and question and api_server_info:
274
  contextual_prompt = st.session_state.memory + "\n" + question
275
  response = query_engine_upload_doc.query(contextual_prompt)
 
291
  #st.write()
292
  #print("Is File uploaded : ",uploaded_file==True, "Is question asked : ", question==True, "Is question asked : ", api_server_info==True)
293
 
294
+ st.subheader('⚠️ Warning: To avoid lags read carefully the steps below')
295
+ st.markdown("**ONE EXECUTION COULD TAKE UP TO 2 or 3 minutes because of hardware (0.9 token/second)**")
296
  st.markdown("Please consider **delete input prompt** and **clear memory** with the button on sidebar, each time you switch to another tab")
297
  st.markdown("If you've got a GPU locally, the execution could be a **lot faster** (approximately 5 seconds on my local machine).")
298