SearchGPT

Paused

App Files Files Community

Shreyas094 commited on Aug 4, 2024

Commit

bd7a464

verified ·

1 Parent(s): 9ccaed2

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -61

app.py CHANGED Viewed

@@ -104,11 +104,6 @@ def update_vectors(files, parser):
                 logging.warning(f"No chunks loaded from {file.name}")
                 continue
             logging.info(f"Loaded {len(data)} chunks from {file.name}")
-            # Add source to metadata
-            for chunk in data:
-                chunk.metadata["source"] = file.name
             all_data.extend(data)
             total_chunks += len(data)
             if not any(doc["name"] == file.name for doc in uploaded_documents):
@@ -184,52 +179,57 @@ def delete_documents(selected_docs):
     return f"Deleted documents: {', '.join(deleted_docs)}", display_documents()
-def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2):
-    print(f"Starting generate_chunked_response with model: {model}, num_calls: {num_calls}")
     full_response = ""
     messages = [{"role": "user", "content": prompt}]
     if model == "@cf/meta/llama-3.1-8b-instruct":
-        # Cloudflare API logic
-        headers = {
-            "Authorization": f"Bearer {API_TOKEN}",
-            "Content-Type": "application/json"
-        }
         for i in range(num_calls):
             print(f"Starting Cloudflare API call {i+1}")
             try:
                 response = requests.post(
                     f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/@cf/meta/llama-3.1-8b-instruct",
                     json={
-                        "messages": messages,
-                        "stream": True,
                         "max_tokens": max_tokens,
                         "temperature": temperature
                     },
-                    headers=headers,
-                    stream=True
                 )
                 for line in response.iter_lines():
                     if line:
                         try:
                             json_data = json.loads(line.decode('utf-8').split('data: ')[1])
-                            chunk = json_data.get('response', '')
                             full_response += chunk
-                            yield full_response
-                        except (json.JSONDecodeError, IndexError):
                             continue
                 print(f"Cloudflare API call {i+1} completed")
             except Exception as e:
                 print(f"Error in generating response from Cloudflare: {str(e)}")
-                if i == num_calls - 1:  # If this is the last attempt
-                    yield full_response  # Yield whatever response we have so far
     else:
-        # Hugging Face API logic
         client = InferenceClient(model, token=huggingface_token)
         for i in range(num_calls):
             print(f"Starting Hugging Face API call {i+1}")
             try:
                 for message in client.chat_completion(
                     messages=messages,
@@ -237,19 +237,16 @@ def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temp
                     temperature=temperature,
                     stream=True,
                 ):
                     if message.choices and message.choices[0].delta and message.choices[0].delta.content:
                         chunk = message.choices[0].delta.content
                         full_response += chunk
-                        yield full_response
                 print(f"Hugging Face API call {i+1} completed")
             except Exception as e:
                 print(f"Error in generating response from Hugging Face: {str(e)}")
-                if i == num_calls - 1:  # If this is the last attempt
-                    yield full_response  # Yield whatever response we have so far
-    if not full_response:
-        yield "I apologize, but I couldn't generate a response at this time. Please try again later."
     # Clean up the response
     clean_response = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', full_response, flags=re.DOTALL)
     clean_response = clean_response.replace("Using the following context:", "").strip()
@@ -270,7 +267,7 @@ def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temp
     final_response = '\n\n'.join(unique_paragraphs)
     print(f"Final clean response: {final_response[:100]}...")
-    yield final_response
 def duckduckgo_search(query):
     with DDGS() as ddgs:
@@ -308,16 +305,19 @@ def retry_last_response(history, use_web_search, model, temperature, num_calls):
     return chatbot_interface(last_user_msg, history, use_web_search, model, temperature, num_calls)
-def respond(message, history, use_web_search, model, temperature, num_calls, selected_docs):
     logging.info(f"User Query: {message}")
     logging.info(f"Model Used: {model}")
     logging.info(f"Search Type: {'Web Search' if use_web_search else 'PDF Search'}")
     logging.info(f"Selected Documents: {selected_docs}")
     try:
         if use_web_search:
             for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
                 response = f"{main_content}\n\n{sources}"
                 yield response
         else:
             embed = get_embeddings()
@@ -335,26 +335,28 @@ def respond(message, history, use_web_search, model, temperature, num_calls, sel
                 context_str = "\n".join([doc.page_content for doc in relevant_docs])
             else:
                 yield "No documents available. Please upload PDF documents to answer questions."
                 return
             if model == "@cf/meta/llama-3.1-8b-instruct":
                 # Use Cloudflare API
-                prompt = f"""Using the following context from the PDF documents:
-                {context_str}
-                Write a detailed and complete response that answers the following user question: '{message}'"""
-                for partial_response in generate_chunked_response(prompt, model, num_calls=num_calls, temperature=temperature):
                     yield partial_response
             else:
                 # Use Hugging Face API
                 for partial_response in get_response_from_pdf(message, model, selected_docs, num_calls=num_calls, temperature=temperature):
                     yield partial_response
     except Exception as e:
         logging.error(f"Error with {model}: {str(e)}")
         if "microsoft/Phi-3-mini-4k-instruct" in model:
             logging.info("Falling back to Mistral model due to Phi-3 error")
             fallback_model = "mistralai/Mistral-7B-Instruct-v0.3"
-            yield from respond(message, history, use_web_search, fallback_model, temperature, num_calls, selected_docs)
         else:
             yield f"An error occurred with the {model} model: {str(e)}. Please try again or select a different model."
@@ -458,36 +460,25 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
         yield "No documents available. Please upload PDF documents to answer questions."
         return
-    # Log all documents in the database
-    all_docs = list(database.docstore._dict.values())
-    logging.info(f"All documents in database: {[doc.metadata['source'] for doc in all_docs]}")
-    logging.info(f"Number of documents in database: {len(all_docs)}")
-    # Get only the selected documents
-    selected_docs_content = [doc for doc in all_docs if doc.metadata["source"] in selected_docs]
-    logging.info(f"Number of selected documents: {len(selected_docs_content)}")
-    logging.info(f"Selected documents: {[doc.metadata['source'] for doc in selected_docs_content]}")
-    if not selected_docs_content:
-        logging.warning(f"No documents found for the selected sources: {selected_docs}")
-        yield "No documents found for the selected sources. Please check your document selection."
-        return
-    # Perform similarity search on selected documents
-    relevant_docs = database.similarity_search(query, k=5, filter=lambda doc: doc.metadata["source"] in selected_docs)
-    logging.info(f"Number of relevant documents after similarity search: {len(relevant_docs)}")
-    logging.info(f"Relevant documents: {[doc.metadata['source'] for doc in relevant_docs]}")
-    if not relevant_docs:
-        logging.warning("No relevant documents found after similarity search")
-        yield "No relevant information found in the selected documents. Please try rephrasing your query."
         return
-    for doc in relevant_docs:
         logging.info(f"Document source: {doc.metadata['source']}")
         logging.info(f"Document content preview: {doc.page_content[:100]}...")  # Log first 100 characters of each document
-    context_str = "\n".join([doc.page_content for doc in relevant_docs])
     logging.info(f"Total context length: {len(context_str)}")
     if model == "@cf/meta/llama-3.1-8b-instruct":

                 logging.warning(f"No chunks loaded from {file.name}")
                 continue
             logging.info(f"Loaded {len(data)} chunks from {file.name}")
             all_data.extend(data)
             total_chunks += len(data)
             if not any(doc["name"] == file.name for doc in uploaded_documents):
     return f"Deleted documents: {', '.join(deleted_docs)}", display_documents()
+def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
+    print(f"Starting generate_chunked_response with {num_calls} calls")
     full_response = ""
     messages = [{"role": "user", "content": prompt}]
     if model == "@cf/meta/llama-3.1-8b-instruct":
+        # Cloudflare API
         for i in range(num_calls):
             print(f"Starting Cloudflare API call {i+1}")
+            if should_stop:
+                print("Stop clicked, breaking loop")
+                break
             try:
                 response = requests.post(
                     f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/@cf/meta/llama-3.1-8b-instruct",
+                    headers={"Authorization": f"Bearer {API_TOKEN}"},
                     json={
+                        "stream": true,
+                        "messages": [
+                            {"role": "system", "content": "You are a friendly assistant"},
+                            {"role": "user", "content": prompt}
+                        ],
                         "max_tokens": max_tokens,
                         "temperature": temperature
                     },
+                    stream=true
                 )
                 for line in response.iter_lines():
+                    if should_stop:
+                        print("Stop clicked during streaming, breaking")
+                        break
                     if line:
                         try:
                             json_data = json.loads(line.decode('utf-8').split('data: ')[1])
+                            chunk = json_data['response']
                             full_response += chunk
+                        except json.JSONDecodeError:
                             continue
                 print(f"Cloudflare API call {i+1} completed")
             except Exception as e:
                 print(f"Error in generating response from Cloudflare: {str(e)}")
     else:
+        # Original Hugging Face API logic
         client = InferenceClient(model, token=huggingface_token)
         for i in range(num_calls):
             print(f"Starting Hugging Face API call {i+1}")
+            if should_stop:
+                print("Stop clicked, breaking loop")
+                break
             try:
                 for message in client.chat_completion(
                     messages=messages,
                     temperature=temperature,
                     stream=True,
                 ):
+                    if should_stop:
+                        print("Stop clicked during streaming, breaking")
+                        break
                     if message.choices and message.choices[0].delta and message.choices[0].delta.content:
                         chunk = message.choices[0].delta.content
                         full_response += chunk
                 print(f"Hugging Face API call {i+1} completed")
             except Exception as e:
                 print(f"Error in generating response from Hugging Face: {str(e)}")
     # Clean up the response
     clean_response = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', full_response, flags=re.DOTALL)
     clean_response = clean_response.replace("Using the following context:", "").strip()
     final_response = '\n\n'.join(unique_paragraphs)
     print(f"Final clean response: {final_response[:100]}...")
+    return final_response
 def duckduckgo_search(query):
     with DDGS() as ddgs:
     return chatbot_interface(last_user_msg, history, use_web_search, model, temperature, num_calls)
+def respond(message, history, model, temperature, num_calls, use_web_search, selected_docs):
     logging.info(f"User Query: {message}")
     logging.info(f"Model Used: {model}")
     logging.info(f"Search Type: {'Web Search' if use_web_search else 'PDF Search'}")
     logging.info(f"Selected Documents: {selected_docs}")
     try:
         if use_web_search:
             for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
                 response = f"{main_content}\n\n{sources}"
+                first_line = response.split('\n')[0] if response else ''
+#                logging.info(f"Generated Response (first line): {first_line}")
                 yield response
         else:
             embed = get_embeddings()
                 context_str = "\n".join([doc.page_content for doc in relevant_docs])
             else:
+                context_str = "No documents available."
                 yield "No documents available. Please upload PDF documents to answer questions."
                 return
             if model == "@cf/meta/llama-3.1-8b-instruct":
                 # Use Cloudflare API
+                for partial_response in get_response_from_cloudflare(prompt="", context=context_str, query=message, num_calls=num_calls, temperature=temperature, search_type="pdf"):
+                    first_line = partial_response.split('\n')[0] if partial_response else ''
+#                   logging.info(f"Generated Response (first line): {first_line}")
                     yield partial_response
             else:
                 # Use Hugging Face API
                 for partial_response in get_response_from_pdf(message, model, selected_docs, num_calls=num_calls, temperature=temperature):
+                    first_line = partial_response.split('\n')[0] if partial_response else ''
+#                    logging.info(f"Generated Response (first line): {first_line}")
                     yield partial_response
     except Exception as e:
         logging.error(f"Error with {model}: {str(e)}")
         if "microsoft/Phi-3-mini-4k-instruct" in model:
             logging.info("Falling back to Mistral model due to Phi-3 error")
             fallback_model = "mistralai/Mistral-7B-Instruct-v0.3"
+            yield from respond(message, history, fallback_model, temperature, num_calls, use_web_search, selected_docs)
         else:
             yield f"An error occurred with the {model} model: {str(e)}. Please try again or select a different model."
         yield "No documents available. Please upload PDF documents to answer questions."
         return
+    retriever = database.as_retriever()
+    logging.info(f"Retrieving relevant documents for query: {query}")
+    relevant_docs = retriever.get_relevant_documents(query)
+    logging.info(f"Number of relevant documents retrieved: {len(relevant_docs)}")
+    # Filter relevant_docs based on selected documents
+    filtered_docs = [doc for doc in relevant_docs if doc.metadata["source"] in selected_docs]
+    logging.info(f"Number of filtered documents: {len(filtered_docs)}")
+    if not filtered_docs:
+        logging.warning(f"No relevant information found in the selected documents: {selected_docs}")
+        yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
         return
+    for doc in filtered_docs:
         logging.info(f"Document source: {doc.metadata['source']}")
         logging.info(f"Document content preview: {doc.page_content[:100]}...")  # Log first 100 characters of each document
+    context_str = "\n".join([doc.page_content for doc in filtered_docs])
     logging.info(f"Total context length: {len(context_str)}")
     if model == "@cf/meta/llama-3.1-8b-instruct":