Spaces:

kastan
/

ai-teaching-assistant

Runtime error

App Files Files Community

kastan commited on Apr 5, 2023

Commit

ed62da9

1 Parent(s): f910cb6

adding new Pinecone index to demo

Browse files

Files changed (2) hide show

app.py +17 -11
retrieval.py +4 -4

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import retrieval
 # UNCOMMENT ONLY WHEN RUNNING LOCALLY (not on Spaces)
 # from dotenv import load_dotenv
 from text_generation import Client, InferenceAPIClient
 # load API keys from globally-availabe .env file
 # SECRETS_FILEPATH = "/mnt/project/chatbotai/huggingface_cache/internal_api_keys.env"
@@ -106,7 +107,7 @@ def predict(
         stop_sequences=[user_name.rstrip(), assistant_name.rstrip()],
     )
-  final_chat_response = None
   for i, response in enumerate(iterator):
     if response.token.special:
       continue
@@ -123,28 +124,33 @@ def predict(
       history[-1] = partial_words
     chat = [(history[i].strip(), history[i + 1].strip()) for i in range(0, len(history) - 1, 2)]
-    final_chat_response = chat
     yield chat, history, None, None, None, []
-  # Not perfect, but much better at removing all the crazy newlines.
-  cleaned_final_chat_response = []
-  for human_chat, bot_chat in final_chat_response:
-    human_chat = human_chat.replace("<br>", "")
-    human_chat = human_chat.replace("\n\n", "\n")
-    bot_chat = bot_chat.replace("<br>", "")
-    bot_chat = bot_chat.replace("\n\n", "\n")
-    cleaned_final_chat_response.append( (human_chat, bot_chat) )
   # Pinecone context retrieval
   top_context_list = ta.retrieve_contexts_from_pinecone(user_question=inputs, topk=NUM_ANSWERS_GENERATED)
   # yield chat, history, top_context_list[0], top_context_list[1], top_context_list[2], []
   yield cleaned_final_chat_response, history, top_context_list[0], top_context_list[1], top_context_list[2], []
   # run CLIP
   images_list = ta.clip_text_to_image(inputs)
   # yield chat, history, top_context_list[0], top_context_list[1], top_context_list[2], images_list
   yield cleaned_final_chat_response, history, top_context_list[0], top_context_list[1], top_context_list[2], images_list
 def reset_textbox():
   return gr.update(value="")

 # UNCOMMENT ONLY WHEN RUNNING LOCALLY (not on Spaces)
 # from dotenv import load_dotenv
 from text_generation import Client, InferenceAPIClient
+from typing import List, Tuple
 # load API keys from globally-availabe .env file
 # SECRETS_FILEPATH = "/mnt/project/chatbotai/huggingface_cache/internal_api_keys.env"
         stop_sequences=[user_name.rstrip(), assistant_name.rstrip()],
     )
+  chat_response = None
   for i, response in enumerate(iterator):
     if response.token.special:
       continue
       history[-1] = partial_words
     chat = [(history[i].strip(), history[i + 1].strip()) for i in range(0, len(history) - 1, 2)]
+    chat_response = chat
     yield chat, history, None, None, None, []
+  cleaned_final_chat_response = clean_chat_response(chat_response)
   # Pinecone context retrieval
   top_context_list = ta.retrieve_contexts_from_pinecone(user_question=inputs, topk=NUM_ANSWERS_GENERATED)
   # yield chat, history, top_context_list[0], top_context_list[1], top_context_list[2], []
   yield cleaned_final_chat_response, history, top_context_list[0], top_context_list[1], top_context_list[2], []
+  cleaned_final_chat_response = clean_chat_response(chat_response)
   # run CLIP
   images_list = ta.clip_text_to_image(inputs)
   # yield chat, history, top_context_list[0], top_context_list[1], top_context_list[2], images_list
   yield cleaned_final_chat_response, history, top_context_list[0], top_context_list[1], top_context_list[2], images_list
+def clean_chat_response(chat: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
+  ''' Not perfect, but much better at removing all the crazy newlines. '''
+  cleaned_chat = []
+  for human_chat, bot_chat in chat:
+    # human_chat = human_chat.replace("<br>", "")
+    human_chat = human_chat.replace("\n\n", "\n")
+    # bot_chat = bot_chat.replace("<br>", "")
+    bot_chat = bot_chat.replace("\n\n", "\n")
+    cleaned_chat.append( (human_chat, bot_chat) )
+  return cleaned_chat
 def reset_textbox():
   return gr.update(value="")

retrieval.py CHANGED Viewed

@@ -47,11 +47,11 @@ class Retrieval:
   def _load_pinecone_vectorstore(self,):
     model_name = "intfloat/e5-large"  # best text embedding model. 1024 dims.
     embeddings = HuggingFaceEmbeddings(model_name=model_name)
-    #pinecone.init(api_key=os.environ['PINECONE_API_KEY'], environment="us-west1-gcp")
-    pinecone.init(api_key=PINECONE_API_KEY, environment="us-west1-gcp")
-    pincecone_index = pinecone.Index("uiuc-chatbot")
     self.vectorstore = Pinecone(index=pincecone_index, embedding_function=embeddings.embed_query, text_key="text")

   def _load_pinecone_vectorstore(self,):
     model_name = "intfloat/e5-large"  # best text embedding model. 1024 dims.
     embeddings = HuggingFaceEmbeddings(model_name=model_name)
+    # pinecone.init(api_key=os.environ.get('PINECONE_API_KEY'), environment="us-west1-gcp")
+    # pincecone_index = pinecone.Index("uiuc-chatbot")
+    pinecone.init(api_key=os.environ.get('PINECONE_API_KEY_NEW_ACCT'), environment="us-east4-gcp")
+    pincecone_index = pinecone.Index("uiuc-chatbot-deduped")
     self.vectorstore = Pinecone(index=pincecone_index, embedding_function=embeddings.embed_query, text_key="text")